オリジナルソース:http://www.openmp.org/samples/jacobi.f
オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットし、配列を動的割り当てするようにしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。
program main
************************************************************
* program to solve a finite difference
* discretization of Helmholtz equation :
* (d2/dx2)u + (d2/dy2)u - alpha u = f
* using Jacobi iterative method.
*
* Modified: Sanjiv Shah, Kuck and Associates, Inc. (KAI), 1998
* Author: Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998
*
* Directives are used in this code to achieve paralleism.
* All do loops are parallized with default 'static' scheduling.
*
* Input : n - grid real*8 in x direction
* m - grid real*8 in y direction
* alpha - Helmholtz constant (always greater than 0.0)
* tol - error tolerance for iterative solver
* relax - Successice over relaxation parameter
* mits - Maximum iterations for iterative solver
*
* On output
* : u(n,m) - Dependent variable (solutions)
* : f(n,m) - Right hand side function
*************************************************************
implicit none
integer n,m,mits
real*8 tol,relax,alpha
real(8), allocatable,dimension(:,:):: u,f
common /idat/ n,m,mits
common /fdat/tol,alpha,relax
*
* Read info
*
write(*,*) "Input n,m - grid real*8 in x,y direction "
C read(5,*) n,m
n = 5120
m = 10000
print *, "N=",n,"M=",m
allocate(u(n,m))
allocate(f(n,m))
write(*,*) "Input alpha - Helmholts constant "
C read(5,*) alpha
alpha = 1.
write(*,*) "Input relax - Successive over-relaxation parameter"
C read(5,*) relax
relax=0.9
write(*,*) "Input tol - error tolerance for iterative solver"
C read(5,*) tol
tol = 1.d-13
write(*,*) "Input mits - Maximum iterations for solver"
C read(5,*) mits
mits=100
*
* Calls a driver routine
*
call driver (u,f)
stop
end
subroutine driver (u,f)
*************************************************************
* Subroutine driver ()
* This is where the arrays are allocated and initialzed.
*
* Working varaibles/arrays
* dx - grid spacing in x direction
* dy - grid spacing in y direction
*************************************************************
implicit none
integer n,m,mits,mtemp
real*8 tol,relax,alpha
common /idat/ n,m,mits,mtemp
common /fdat/tol,alpha,relax
real*8 u(n,m),f(n,m),dx,dy
* Elapsed time function initialized -- SofTek
integer ic, icr, icm
real*8 cpu0, cpu1, elapsed, t_ac
real*8 second
external second
call system_clock(ic,icr,icm)
t_ac= 1.0d0/real(icr,8)
print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac
* Initialize data
cpu0 = second()
call initialize (n,m,alpha,dx,dy,u,f)
* Solve Helmholtz equation
call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits)
* Check error between exact solution
call error_check (n,m,alpha,dx,dy,u,f)
cpu1 = second()
* Printout Elapsed time (SofTek)
elapsed = (cpu1 -cpu0) * t_ac
print '(/,1x,a,F10.3/)',
& 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed
return
end
subroutine initialize (n,m,alpha,dx,dy,u,f)
******************************************************
* Initializes data
* Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2)
*
******************************************************
implicit none
integer n,m
real*8 u(n,m),f(n,m),dx,dy,alpha
integer i,j, xx,yy
real*8 PI
parameter (PI=3.1415926)
dx = 2.0 / (n-1)
dy = 2.0 / (m-1)
* Initilize initial condition and RHS
!$omp parallel do private(xx,yy)
do j = 1,m
do i = 1,n
xx = -1.0 + dx * real(i-1) ! -1 < x < 1
yy = -1.0 + dy * real(j-1) ! -1 < y < 1
u(i,j) = 0.0
f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy)
& - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy)
enddo
enddo
!$omp end parallel do
return
end
subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit)
******************************************************************
* Subroutine HelmholtzJ
* Solves poisson equation on rectangular grid assuming :
* (1) Uniform discretization in each direction, and
* (2) Dirichlect boundary conditions
*
* Jacobi method is used in this routine
*
* Input : n,m Number of grid points in the X/Y directions
* dx,dy Grid spacing in the X/Y directions
* alpha Helmholtz eqn. coefficient
* omega Relaxation factor
* f(n,m) Right hand side function
* u(n,m) Dependent variable/Solution
* tol Tolerance for iterative solver
* maxit Maximum number of iterations
*
* Output : u(n,m) - Solution
*****************************************************************
implicit none
integer n,m,maxit
real*8 dx,dy,f(n,m),u(n,m),alpha, tol,omega
*
* Local variables
*
integer i,j,k,k_local
real*8 error,resid,rsum,ax,ay,b
real*8 error_local
real(8), allocatable, dimension(:,:):: uold
real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2
real te1,te2
real second
external second
allocate(uold(n,m))
*
* Initialize coefficients
ax = 1.0/(dx*dx) ! X-direction coef
ay = 1.0/(dy*dy) ! Y-direction coef
b = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff
error = 10.0 * tol
k = 1
do while (k.le.maxit .and. error.gt. tol)
error = 0.0
* Copy new solution into old
!$omp parallel default(shared)
!$omp do
do j=1,m
do i=1,n
uold(i,j) = u(i,j)
enddo
enddo
* Compute stencil, residual, & update
!$omp do private(resid) reduction(+:error)
do j = 2,m-1
do i = 2,n-1
* Evaluate residual
resid = (ax*(uold(i-1,j) + uold(i+1,j))
& + ay*(uold(i,j-1) + uold(i,j+1))
& + b * uold(i,j) - f(i,j))/b
* Update solution
u(i,j) = uold(i,j) - omega * resid
* Accumulate residual error
error = error + resid*resid
end do
enddo
!$omp enddo nowait
!$omp end parallel
* Error check
k = k + 1
error = sqrt(error)/dble(n*m)
*
enddo ! End iteration loop
*
print *, 'Total Number of Iterations ', k
print *, 'Residual ', error
return
end
subroutine error_check (n,m,alpha,dx,dy,u,f)
implicit none
************************************************************
* Checks error between numerical and exact solution
*
************************************************************
integer n,m
real*8 u(n,m),f(n,m),dx,dy,alpha
integer i,j
real*8 xx,yy,temp,error
dx = 2.0 / (n-1)
dy = 2.0 / (m-1)
error = 0.0
!$omp parallel do private(xx,yy,temp) reduction(+:error)
do j = 1,m
do i = 1,n
xx = -1.0e0 + dx * dble(i-1)
yy = -1.0e0 + dy * dble(j-1)
temp = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy)
error = error + temp*temp
enddo
enddo
error = sqrt(error)/real(n*m)
print *, 'Solution Error : ',error
return
end
function second() result(rtime)
implicit none
!
integer :: ic,ir,im
real(8) :: rtime
!
call system_clock(ic,ir,im)
!
rtime= real(ic,8)
!
return
end
オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。
program main
************************************************************
* program to solve a finite difference
* discretization of Helmholtz equation :
* (d2/dx2)u + (d2/dy2)u - alpha u = f
* using Jacobi iterative method.
*
* Modified: Sanjiv Shah, Kuck and Associates, Inc. (KAI), 1998
* Author: Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998
*
* Directives are used in this code to achieve paralleism.
* All do loops are parallized with default 'static' scheduling.
*
* Input : n - grid real*4 in x direction
* m - grid real*4 in y direction
* alpha - Helmholtz constant (always greater than 0.0)
* tol - error tolerance for iterative solver
* relax - Successice over relaxation parameter
* mits - Maximum iterations for iterative solver
*
* On output
* : u(n,m) - Dependent variable (solutions)
* : f(n,m) - Right hand side function
*************************************************************
implicit none
integer n,m,mits
real*4 tol,relax,alpha
real(4), allocatable,dimension(:,:):: u,f
common /idat/ n,m,mits
common /fdat/tol,alpha,relax
*
* Read info
*
write(*,*) "Input n,m - grid real*4 in x,y direction "
C read(5,*) n,m
n = 5120
m = 10000
print *, "N=",n,"M=",m
allocate(u(n,m))
allocate(f(n,m))
write(*,*) "Input alpha - Helmholts constant "
C read(5,*) alpha
alpha = 1.
write(*,*) "Input relax - Successive over-relaxation parameter"
C read(5,*) relax
relax=0.9
write(*,*) "Input tol - error tolerance for iterative solver"
C read(5,*) tol
tol = 1.e-13
write(*,*) "Input mits - Maximum iterations for solver"
C read(5,*) mits
mits=100
*
* Calls a driver routine
*
call driver(u,f)
stop
end
subroutine driver(u,f)
*************************************************************
* Subroutine driver ()
* This is where the arrays are allocated and initialzed.
*
* Working varaibles/arrays
* dx - grid spacing in x direction
* dy - grid spacing in y direction
*************************************************************
implicit none
integer n,m,mits,mtemp
real*4 tol,relax,alpha
common /idat/ n,m,mits,mtemp
common /fdat/tol,alpha,relax
real*4 u(n,m),f(n,m),dx,dy
* Elapsed time function initialized -- SofTek
integer ic, icr, icm
real*8 cpu0, cpu1, elapsed, t_ac
real*8 second
external second
call system_clock(ic,icr,icm)
t_ac= 1.0e0/real(icr,8)
print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac
* Initialize data
cpu0 = second()
call initialize (n,m,alpha,dx,dy,u,f)
* Solve Helmholtz equation
call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits)
* Check error between exact solution
call error_check (n,m,alpha,dx,dy,u,f)
cpu1 = second()
* Printout Elapsed time (SofTek)
elapsed = (cpu1 -cpu0) * t_ac
print '(/,1x,a,F10.3/)',
& 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed
return
end
subroutine initialize (n,m,alpha,dx,dy,u,f)
******************************************************
* Initializes data
* Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2)
*
******************************************************
implicit none
integer n,m
real*4 u(n,m),f(n,m),dx,dy,alpha
integer i,j, xx,yy
real*4 PI
parameter (PI=3.1415926)
dx = 2.0 / (n-1)
dy = 2.0 / (m-1)
* Initilize initial condition and RHS
!$omp parallel do private(xx,yy)
do j = 1,m
do i = 1,n
xx = -1.0 + dx * real(i-1) ! -1 < x < 1
yy = -1.0 + dy * real(j-1) ! -1 < y < 1
u(i,j) = 0.0
f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy)
& - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy)
enddo
enddo
!$omp end parallel do
return
end
subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit)
******************************************************************
* Subroutine HelmholtzJ
* Solves poisson equation on rectangular grid assuming :
* (1) Uniform discretization in each direction, and
* (2) Dirichlect boundary conditions
*
* Jacobi method is used in this routine
*
* Input : n,m Number of grid points in the X/Y directions
* dx,dy Grid spacing in the X/Y directions
* alpha Helmholtz eqn. coefficient
* omega Relaxation factor
* f(n,m) Right hand side function
* u(n,m) Dependent variable/Solution
* tol Tolerance for iterative solver
* maxit Maximum number of iterations
*
* Output : u(n,m) - Solution
*****************************************************************
implicit none
integer n,m,maxit
real*4 dx,dy,f(n,m),u(n,m),alpha, tol,omega
*
* Local variables
*
integer i,j,k,k_local
real*4 error,resid,rsum,ax,ay,b
real*4 error_local
real(4), allocatable,dimension(:,:):: uold
real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2
real te1,te2
real second
external second
allocate(uold(n,m))
*
* Initialize coefficients
ax = 1.0/(dx*dx) ! X-direction coef
ay = 1.0/(dy*dy) ! Y-direction coef
b = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff
error = 10.0 * tol
k = 1
do while (k.le.maxit .and. error.gt. tol)
error = 0.0
* Copy new solution into old
!$omp parallel default(shared)
!$omp do
do j=1,m
do i=1,n
uold(i,j) = u(i,j)
enddo
enddo
* Compute stencil, residual, & update
!$omp do private(resid) reduction(+:error)
do j = 2,m-1
do i = 2,n-1
* Evaluate residual
resid = (ax*(uold(i-1,j) + uold(i+1,j))
& + ay*(uold(i,j-1) + uold(i,j+1))
& + b * uold(i,j) - f(i,j))/b
* Update solution
u(i,j) = uold(i,j) - omega * resid
* Accumulate residual error
error = error + resid*resid
end do
enddo
!$omp enddo nowait
!$omp end parallel
* Error check
k = k + 1
error = sqrt(error)/real(n*m)
*
enddo ! End iteration loop
*
print *, 'Total Number of Iterations ', k
print *, 'Residual ', error
return
end
subroutine error_check (n,m,alpha,dx,dy,u,f)
implicit none
************************************************************
* Checks error between numerical and exact solution
*
************************************************************
integer n,m
real*4 u(n,m),f(n,m),dx,dy,alpha
integer i,j
real*4 xx,yy,temp,error
dx = 2.0 / (n-1)
dy = 2.0 / (m-1)
error = 0.0
!$omp parallel do private(xx,yy,temp) reduction(+:error)
do j = 1,m
do i = 1,n
xx = -1.0e0 + dx * real(i-1)
yy = -1.0e0 + dy * real(j-1)
temp = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy)
error = error + temp*temp
enddo
enddo
error = sqrt(error)/real(n*m)
print *, 'Solution Error : ',error
return
end
function second() result(rtime)
implicit none
!
integer :: ic,ir,im
real(8) :: rtime
!
call system_clock(ic,ir,im)
!
rtime= real(ic,8)
!
return
end