「非構造化データ領域(unstructured data region)」と言うと分かりにくいが、これは任意の場所から任意のデータを開始することができ、任意の場所で当該データ領域の終了を行うことができる領域のことである。7章で説明した Enter Data と Exit Data ディレクティブ によって囲まれた領域のことを指す。データ領域をいわゆる「プログラム言語上の構造ユニット」の最初と終わりによって定義する必要がないということである。このディレクティブは「実行文」的な機能を持つので、様々な場所で活用することにより、プログラミングの自由度が広がる。例えば、ホスト上のデータ割付生成と同時にデバイスデータの生成を組み合わせることによって、ダイナミックなデータ・メンバーを含む集合型データ構成を容易く生成することができる。また、C++ コンストラクタやデストラクタにおいて、ホスト上のクラスメンバーのライフタイムをデバイス上のライフタイムに合わせるようにプログラムすることができる。こうしたことは、クラス定義の中に当該データ管理方法を組み込むことで実現できる。以下に、この非構造化データ領域を活用したプログラム例を示すこととする。
enter data/exit data ディレクティブと update ディレクティブを使った非構造化データ領域を利用するプログラム例を見てみる。データをアロケーションした後に、enter data ディレクティブにより、デバイス上の当該配列データ領域の開始を行い、同時にデバイス側に配列領域を作成する。なお、必ず、ホスト側の配列データを allocation した後に、デバイス側への enter data ディレクティブを指示することが必要である。enter data ディレクティブの場合は、copyin あるいは create のみの使用が許される。一方、exit data ディレクティブの場合は、copyout あるいは delete のみの使用が許される。また、exit data ディレクティブで delete した後に、必要であればホスト側の配列の dealloaction を行うようにする。
update ディレクティブは、あたかも実行文のような機能を持つため、必要となる任意の場所でホスト側とデバイス側のデータの移動を行うことができる。
! Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
!
! Redistribution and use in source and binary forms, with or without modification,
! are permitted provided that the following conditions are met:
!
! *Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
! *Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
! *Neither the name of NVIDIA CORPORATION nor the names of its contributors
! may be used to endorse or promote products derived from this software
! without specific prior written permission.
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
! USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef N
#define N 32
#endif
#ifndef M
#define M 32
#endif
module unstruct
integer, parameter :: dp = selected_real_kind(15, 307)
real(kind=dp), dimension(:,:), allocatable :: A, B
contains
subroutine allocateData (N1,M1) ! 配列をアロケーションするルーチン
integer(8) :: N1,M1
allocate(A(N1,M1),B(N1,M1)) ! ホスト側配列の割付、その後、デバイス側割付の指示
!$acc enter data create(A,B) ! デバイス側に領域の割付を指示する
end subroutine allocateData
subroutine deallocateData() ! 配列をデアロケーションするルーチン
!$acc exit data delete(A,B) ! デバイス側のデータをdeallocation
deallocate(A,B) ! ホスト側の deallocation
end subroutine deallocateData
end module unstruct
program unstruct_example
use unstruct
integer(8) :: N1, M1
integer(8) :: i,j
N1=N
M1=M
call allocateData(N1,M1) ! 実際の配列の割付
B=2.5_dp ! ホスト側 B 配列に 2.5d0 をセット
!$acc update device(B) ! B 配列の内容をデバイス側にコピーする
!$acc kernels present(A,B) ! present句ですでにデバイス側に存在していることを指示
do j=1,N1
do i=1,M1
A(j,i) = B(j,i) + ((j-1)*M1)+(i-1)
end do
end do
!$acc end kernels
!$acc update self(A) ! 計算された A 配列の内容をホスト側にコピーする
do j=2,N1,2
print *, j, A(j,1), A(j,M1)
enddo
call deallocateData()
end program unstruct_example
[kato@photon32 Chap5]$ pgfortran -acc -Minfo unstructured_data.F90
allocatedata:
43, Generating enter data create(b(:,:),a(:,:))
deallocatedata:
47, Generating exit data delete(b(:,:),a(:,:))
unstruct_example:
62, Memory set idiom, array assignment replaced by call to pgf90_mset8
63, Generating update device(b(:,:))
64, Generating present(a(:,:),b(:,:))
65, Loop is parallelizable
66, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
65, !$acc loop gang, vector(128) ! blockidx%x threadidx%x
66, !$acc loop gang ! blockidx%y
71, Generating update self(a(:,:))
[kato@photon32 Chap5]$ ./a.out
2 34.50000000000000 65.50000000000000
4 98.50000000000000 129.5000000000000
6 162.5000000000000 193.5000000000000
8 226.5000000000000 257.5000000000000
10 290.5000000000000 321.5000000000000
12 354.5000000000000 385.5000000000000
14 418.5000000000000 449.5000000000000
16 482.5000000000000 513.5000000000000
18 546.5000000000000 577.5000000000000
20 610.5000000000000 641.5000000000000
22 674.5000000000000 705.5000000000000
24 738.5000000000000 769.5000000000000
26 802.5000000000000 833.5000000000000
28 866.5000000000000 897.5000000000000
30 930.5000000000000 961.5000000000000
32 994.5000000000000 1025.500000000000
上記の Fortran の例と同じ構成の C プログラムである。任意の場所で enter data を行い、update ディレクティブにより、必要となる任意の場所でホスト側とデバイス側のデータの移動を行っている。
! Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
!
! Redistribution and use in source and binary forms, with or without modification,
! are permitted provided that the following conditions are met:
!
! *Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
! *Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
! *Neither the name of NVIDIA CORPORATION nor the names of its contributors
! may be used to endorse or promote products derived from this software
! without specific prior written permission.
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
! USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <stdio.h>
#ifndef N
#define N 1024
#endif
double * allocData(size_t size);
int deleteData(double * A);
int initData(double *A, size_t size, double val);
int printData(double *A, size_t size);
int main() {
double *A, *B;
size_t size, i;
size = N;
A=allocData(size);
B=allocData(size);
initData(B,size,2.5);
/* Perform the computation on the device */
#pragma acc parallel loop present(A,B) // present句ですでにデバイス側に存在していることを指示
for (i=0; i < size; ++i) {
A[i] = B[i] + (double) i;
}
/* Copy back the results */
#pragma acc update self(A[0:size]) // 計算された A をホスト側へコピー
printData(A, size);
deleteData(A);
deleteData(B);
exit(0);
}
double * allocData(size_t size) { // 配列をアロケーションするルーチン
double * tmp;
tmp = (double *) malloc(size*sizeof(double)); // ホスト側の割付、その後でデバイス側の割付指示
/* Create the array on device.
Order matters. The host copy must be allocated before
creating the device copy */
#pragma acc enter data create(tmp[0:size]) // デバイス側に領域の割付を指示する
return tmp;
}
int deleteData(double * A) {
/* Delete the host copy.
Order matters. The device copy must be deleted before
the host copy is freed. */
#pragma acc exit data delete(A) // デバイス側の deallocation
free(A); // ホスト側の deallocation
}
int initData(double *A, size_t size, double val) {
size_t i;
for (i=0; i < size; ++i) {
A[i] = val; // A 配列への assign (ホスト側)
}
/* Update the device with the initial values */
#pragma acc update device(A[0:size]) // デバイス側へ A 配列内容コピー
}
int printData(double *A, size_t size) {
size_t i;
printf("Values:\n");
for (i=0; i < 10; ++i) {
printf("A[%d]=%f\n",i,A[i]);
}
printf("....\n");
for (i=size-10; i < size; ++i) {
printf("A[%d]=%f\n",i,A[i]);
}
}
[kato@photon32]$ pgcc -acc -O2 -Minfo=accel unstructured_data.c -ta=tesla,cc60,cuda8.0
main:
23, Generating present(B[:],A[:])
Accelerator kernel generated
Generating Tesla code
24, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
29, Generating update self(A[:size])
allocData:
41, Generating enter data create(tmp[:size])
deleteData:
49, Generating exit data delete(A[:1])
initData:
60, Generating update device(A[:size])
[kato@photon32]$ a.out
Values:
A[0]=2.500000
A[1]=3.500000
A[2]=4.500000
A[3]=5.500000
A[4]=6.500000
A[5]=7.500000
A[6]=8.500000
A[7]=9.500000
A[8]=10.500000
A[9]=11.500000
....
A[1014]=1016.500000
A[1015]=1017.500000
A[1016]=1018.500000
A[1017]=1019.500000
A[1018]=1020.500000
A[1019]=1021.500000
A[1020]=1022.500000
A[1021]=1023.500000
A[1022]=1024.500000
A[1023]=1025.500000
上記 C プログラムと同じものであるが、二次元配列を使用した場合の例である。
! Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
!
! Redistribution and use in source and binary forms, with or without modification,
! are permitted provided that the following conditions are met:
!
! *Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
! *Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
! *Neither the name of NVIDIA CORPORATION nor the names of its contributors
! may be used to endorse or promote products derived from this software
! without specific prior written permission.
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
! USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <stdio.h>
#ifndef N
#define N 32
#endif
#ifndef M
#define M 32
#endif
double ** allocData(size_t size1, size_t size2);
int deleteData(double ** A, size_t size1);
int initData(double **A, size_t size1, size_t size2, double val);
int printData(double **A, size_t size1, size_t size2);
int main() {
double **A, **B;
size_t size1,size2, i, j;
size1 = N;
size2 = M;
A=allocData(size1,size2);
B=allocData(size1,size2);
initData(B,size1,size2,2.5);
/* Perform the computation on the device */
#pragma acc parallel loop collapse(2) present(A,B) // present(A,B) 節の指定必要
for (j=0; j < size1; ++j) {
for (i=0; i< size2; ++i) {
A[j][i] = B[j][i] + (double) ((j*size2)+i);
}
}
/* Copy back the results */
#pragma acc update self(A[0:size1][0:size2])
printData(A,size1,size2);
deleteData(A,size1);
deleteData(B,size1);
exit(0);
}
double ** allocData(size_t size1, size_t size2) {
double ** tmp;
int i;
tmp = (double **) malloc(size1*sizeof(double*));
for (i=0; i < size1; ++i) {
tmp[i] = (double *) malloc(size2*sizeof(double));
}
#pragma acc enter data create(tmp[0:size1][0:size2]) // 二次元配列のサイズの指定を行う
return tmp;
}
int deleteData(double ** A, size_t size1) {
int i;
#pragma acc exit data delete(A)
for (i=0; i < size1; ++i) {
free(A[i]);
}
free(A);
}
int initData(double **A, size_t size1, size_t size2, double val) {
size_t i,j;
for (j=0; j < size1; ++j) {
for (i=0; i < size2; ++i) {
A[j][i] = val;
}
}
/* Update the device with the initial values */
#pragma acc update device(A[0:size1][0:size2])
}
int printData(double **A, size_t size1, size_t size2) {
size_t i,j,n;
n = size2-1;
printf("Values:\n");
for (i=0; i < 5; ++i) {
printf("A[%d][0]=%f A[%d][%d]=%f\n",i,A[i][0],i,n,A[i][n]);
}
printf("....\n");
for (i=size1-5; i < size1; ++i) {
printf("A[%d][0]=%f A[%d][%d]=%f\n",i,A[i][0],i,n,A[i][n]);
}
}
[kato@photon32]$ pgcc -acc -O2 -Minfo=accel unstructured_data2D.c -ta=tesla,cc60,cuda8.0
main:
27, Generating present(B[:][:],A[:][:])
Accelerator kernel generated
Generating Tesla code
28, #pragma acc loop gang, vector(128) collapse(2) /* blockIdx.x threadIdx.x */
29, /* blockIdx.x threadIdx.x collapsed */
35, Generating update self(A[:size1][:size2])
allocData:
48, Generating enter data create(tmp[:size1][:size2])
deleteData:
54, Generating exit data delete(A[:1][:1])
initData:
70, Generating update device(A[:size1][:size2])
[kato@photon32]$ a.out
Values:
A[0][0]=2.500000 A[0][31]=33.500000
A[1][0]=34.500000 A[1][31]=65.500000
A[2][0]=66.500000 A[2][31]=97.500000
A[3][0]=98.500000 A[3][31]=129.500000
A[4][0]=130.500000 A[4][31]=161.500000
....
A[27][0]=866.500000 A[27][31]=897.500000
A[28][0]=898.500000 A[28][31]=929.500000
A[29][0]=930.500000 A[29][31]=961.500000
A[30][0]=962.500000 A[30][31]=993.500000
A[31][0]=994.500000 A[31][31]=1025.500000
C の構造体を使用した例である。データの動的割付ルーチンに着目して欲しい。
! Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
!
! Redistribution and use in source and binary forms, with or without modification,
! are permitted provided that the following conditions are met:
!
! *Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
! *Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
! *Neither the name of NVIDIA CORPORATION nor the names of its contributors
! may be used to endorse or promote products derived from this software
! without specific prior written permission.
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
! USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <stdio.h>
#ifndef N
#define N 1024
#endif
typedef struct { // 構造体 float3 の定義
float x,y,z;
} float3;
float3 * allocData(size_t size);
int deleteData(float3 * A);
int initData(float3 *A, size_t size, float val);
int printData(float3 *A, size_t size);
int main() {
float3 *A, *B;
size_t size, i;
size = N;
A=allocData(size);
B=allocData(size);
initData(B,size,2.5);
/* Perform the computation on the device */
#pragma acc parallel loop present(A,B) // present(A,B) 節の指定必要
for (i=0; i < size; ++i) {
A[i].x = B[i].x + (float) i;
A[i].y = B[i].y + (float) i*2;
A[i].z = B[i].z + (float) i*3;
}
/* Copy back the results */
#pragma acc update self(A[0:size]) // 計算された A をホスト側へコピー
printData(A, size);
deleteData(A);
deleteData(B);
exit(0);
}
float3 * allocData(size_t size) { // データの動的割付
float3 * tmp;
tmp = (float3 *) malloc(size*sizeof(float3));
/* Create the array on device.
Order matters. The host copy must be allocated before
creating the device copy */
#pragma acc enter data create(tmp[0:size]) // デバイスへの tmp 構造体配列要素のコピー(fixed size)
return tmp;
}
int deleteData(float3 * A) {
/* Delete the host copy.
Order matters. The device copy must be deleted before
the host copy is freed. */
#pragma acc exit data delete(A) // デバイス側データの削除
free(A); // ホスト側データの削除
}
int initData(float3 *A, size_t size, float val) { // データの初期化
size_t i;
for (i=0; i < size; ++i) {
A[i].x = val;
A[i].y = val;
A[i].z = val;
}
/* Update the device with the initial values */
#pragma acc update device(A[0:size]) // 初期化したデータをデバイス側へコピー
}
int printData(float3 *A, size_t size) {
size_t i;
printf("Values:\n");
for (i=0; i < 10; ++i) {
printf("A[%d]=%f\n",i,A[i].x);
}
printf("....\n");
for (i=size-10; i < size; ++i) {
printf("A[%d]=%f\n",i,A[i].x);
}
}
[kato@photon32]$ pgcc -acc -O2 -Minfo=accel unstructured_data.struct.c -ta=tesla,cc60,cuda8.0
main:
28, Generating present(B[:],A[:])
Accelerator kernel generated
Generating Tesla code
29, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
36, Generating update self(A[:size])
allocData:
48, Generating enter data create(tmp[:size])
deleteData:
56, Generating exit data delete(A[:1])
initData:
69, Generating update device(A[:size])
[kato@photon32]$ a.out
Values:
A[0]=2.500000
A[1]=3.500000
A[2]=4.500000
A[3]=5.500000
A[4]=6.500000
A[5]=7.500000
A[6]=8.500000
A[7]=9.500000
A[8]=10.500000
A[9]=11.500000
....
A[1014]=1016.500000
A[1015]=1017.500000
A[1016]=1018.500000
A[1017]=1019.500000
A[1018]=1020.500000
A[1019]=1021.500000
A[1020]=1022.500000
A[1021]=1023.500000
A[1022]=1024.500000
A[1023]=1025.5000000
C++ の List クラスデータに対する非構造化データ領域の利用例を 14 章で説明しているので、こちらを参照願いたい。