オリジナルソース:http://www.openmp.org/samples/jacobi.f
オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットし、配列を動的割り当てするようにしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。
program main ************************************************************ * program to solve a finite difference * discretization of Helmholtz equation : * (d2/dx2)u + (d2/dy2)u - alpha u = f * using Jacobi iterative method. * * Modified: Sanjiv Shah, Kuck and Associates, Inc. (KAI), 1998 * Author: Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998 * * Directives are used in this code to achieve paralleism. * All do loops are parallized with default 'static' scheduling. * * Input : n - grid real*8 in x direction * m - grid real*8 in y direction * alpha - Helmholtz constant (always greater than 0.0) * tol - error tolerance for iterative solver * relax - Successice over relaxation parameter * mits - Maximum iterations for iterative solver * * On output * : u(n,m) - Dependent variable (solutions) * : f(n,m) - Right hand side function ************************************************************* implicit none integer n,m,mits real*8 tol,relax,alpha real(8), allocatable,dimension(:,:):: u,f common /idat/ n,m,mits common /fdat/tol,alpha,relax * * Read info * write(*,*) "Input n,m - grid real*8 in x,y direction " C read(5,*) n,m n = 5120 m = 10000 print *, "N=",n,"M=",m allocate(u(n,m)) allocate(f(n,m)) write(*,*) "Input alpha - Helmholts constant " C read(5,*) alpha alpha = 1. write(*,*) "Input relax - Successive over-relaxation parameter" C read(5,*) relax relax=0.9 write(*,*) "Input tol - error tolerance for iterative solver" C read(5,*) tol tol = 1.d-13 write(*,*) "Input mits - Maximum iterations for solver" C read(5,*) mits mits=100 * * Calls a driver routine * call driver (u,f) stop end subroutine driver (u,f) ************************************************************* * Subroutine driver () * This is where the arrays are allocated and initialzed. * * Working varaibles/arrays * dx - grid spacing in x direction * dy - grid spacing in y direction ************************************************************* implicit none integer n,m,mits,mtemp real*8 tol,relax,alpha common /idat/ n,m,mits,mtemp common /fdat/tol,alpha,relax real*8 u(n,m),f(n,m),dx,dy * Elapsed time function initialized -- SofTek integer ic, icr, icm real*8 cpu0, cpu1, elapsed, t_ac real*8 second external second call system_clock(ic,icr,icm) t_ac= 1.0d0/real(icr,8) print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac * Initialize data cpu0 = second() call initialize (n,m,alpha,dx,dy,u,f) * Solve Helmholtz equation call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits) * Check error between exact solution call error_check (n,m,alpha,dx,dy,u,f) cpu1 = second() * Printout Elapsed time (SofTek) elapsed = (cpu1 -cpu0) * t_ac print '(/,1x,a,F10.3/)', & 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed return end subroutine initialize (n,m,alpha,dx,dy,u,f) ****************************************************** * Initializes data * Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2) * ****************************************************** implicit none integer n,m real*8 u(n,m),f(n,m),dx,dy,alpha integer i,j, xx,yy real*8 PI parameter (PI=3.1415926) dx = 2.0 / (n-1) dy = 2.0 / (m-1) * Initilize initial condition and RHS !$omp parallel do private(xx,yy) do j = 1,m do i = 1,n xx = -1.0 + dx * real(i-1) ! -1 < x < 1 yy = -1.0 + dy * real(j-1) ! -1 < y < 1 u(i,j) = 0.0 f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy) & - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy) enddo enddo !$omp end parallel do return end subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit) ****************************************************************** * Subroutine HelmholtzJ * Solves poisson equation on rectangular grid assuming : * (1) Uniform discretization in each direction, and * (2) Dirichlect boundary conditions * * Jacobi method is used in this routine * * Input : n,m Number of grid points in the X/Y directions * dx,dy Grid spacing in the X/Y directions * alpha Helmholtz eqn. coefficient * omega Relaxation factor * f(n,m) Right hand side function * u(n,m) Dependent variable/Solution * tol Tolerance for iterative solver * maxit Maximum number of iterations * * Output : u(n,m) - Solution ***************************************************************** implicit none integer n,m,maxit real*8 dx,dy,f(n,m),u(n,m),alpha, tol,omega * * Local variables * integer i,j,k,k_local real*8 error,resid,rsum,ax,ay,b real*8 error_local real(8), allocatable, dimension(:,:):: uold real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2 real te1,te2 real second external second allocate(uold(n,m)) * * Initialize coefficients ax = 1.0/(dx*dx) ! X-direction coef ay = 1.0/(dy*dy) ! Y-direction coef b = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff error = 10.0 * tol k = 1 do while (k.le.maxit .and. error.gt. tol) error = 0.0 * Copy new solution into old !$omp parallel default(shared) !$omp do do j=1,m do i=1,n uold(i,j) = u(i,j) enddo enddo * Compute stencil, residual, & update !$omp do private(resid) reduction(+:error) do j = 2,m-1 do i = 2,n-1 * Evaluate residual resid = (ax*(uold(i-1,j) + uold(i+1,j)) & + ay*(uold(i,j-1) + uold(i,j+1)) & + b * uold(i,j) - f(i,j))/b * Update solution u(i,j) = uold(i,j) - omega * resid * Accumulate residual error error = error + resid*resid end do enddo !$omp enddo nowait !$omp end parallel * Error check k = k + 1 error = sqrt(error)/dble(n*m) * enddo ! End iteration loop * print *, 'Total Number of Iterations ', k print *, 'Residual ', error return end subroutine error_check (n,m,alpha,dx,dy,u,f) implicit none ************************************************************ * Checks error between numerical and exact solution * ************************************************************ integer n,m real*8 u(n,m),f(n,m),dx,dy,alpha integer i,j real*8 xx,yy,temp,error dx = 2.0 / (n-1) dy = 2.0 / (m-1) error = 0.0 !$omp parallel do private(xx,yy,temp) reduction(+:error) do j = 1,m do i = 1,n xx = -1.0e0 + dx * dble(i-1) yy = -1.0e0 + dy * dble(j-1) temp = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy) error = error + temp*temp enddo enddo error = sqrt(error)/real(n*m) print *, 'Solution Error : ',error return end function second() result(rtime) implicit none ! integer :: ic,ir,im real(8) :: rtime ! call system_clock(ic,ir,im) ! rtime= real(ic,8) ! return end
オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。
program main ************************************************************ * program to solve a finite difference * discretization of Helmholtz equation : * (d2/dx2)u + (d2/dy2)u - alpha u = f * using Jacobi iterative method. * * Modified: Sanjiv Shah, Kuck and Associates, Inc. (KAI), 1998 * Author: Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998 * * Directives are used in this code to achieve paralleism. * All do loops are parallized with default 'static' scheduling. * * Input : n - grid real*4 in x direction * m - grid real*4 in y direction * alpha - Helmholtz constant (always greater than 0.0) * tol - error tolerance for iterative solver * relax - Successice over relaxation parameter * mits - Maximum iterations for iterative solver * * On output * : u(n,m) - Dependent variable (solutions) * : f(n,m) - Right hand side function ************************************************************* implicit none integer n,m,mits real*4 tol,relax,alpha real(4), allocatable,dimension(:,:):: u,f common /idat/ n,m,mits common /fdat/tol,alpha,relax * * Read info * write(*,*) "Input n,m - grid real*4 in x,y direction " C read(5,*) n,m n = 5120 m = 10000 print *, "N=",n,"M=",m allocate(u(n,m)) allocate(f(n,m)) write(*,*) "Input alpha - Helmholts constant " C read(5,*) alpha alpha = 1. write(*,*) "Input relax - Successive over-relaxation parameter" C read(5,*) relax relax=0.9 write(*,*) "Input tol - error tolerance for iterative solver" C read(5,*) tol tol = 1.e-13 write(*,*) "Input mits - Maximum iterations for solver" C read(5,*) mits mits=100 * * Calls a driver routine * call driver(u,f) stop end subroutine driver(u,f) ************************************************************* * Subroutine driver () * This is where the arrays are allocated and initialzed. * * Working varaibles/arrays * dx - grid spacing in x direction * dy - grid spacing in y direction ************************************************************* implicit none integer n,m,mits,mtemp real*4 tol,relax,alpha common /idat/ n,m,mits,mtemp common /fdat/tol,alpha,relax real*4 u(n,m),f(n,m),dx,dy * Elapsed time function initialized -- SofTek integer ic, icr, icm real*8 cpu0, cpu1, elapsed, t_ac real*8 second external second call system_clock(ic,icr,icm) t_ac= 1.0e0/real(icr,8) print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac * Initialize data cpu0 = second() call initialize (n,m,alpha,dx,dy,u,f) * Solve Helmholtz equation call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits) * Check error between exact solution call error_check (n,m,alpha,dx,dy,u,f) cpu1 = second() * Printout Elapsed time (SofTek) elapsed = (cpu1 -cpu0) * t_ac print '(/,1x,a,F10.3/)', & 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed return end subroutine initialize (n,m,alpha,dx,dy,u,f) ****************************************************** * Initializes data * Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2) * ****************************************************** implicit none integer n,m real*4 u(n,m),f(n,m),dx,dy,alpha integer i,j, xx,yy real*4 PI parameter (PI=3.1415926) dx = 2.0 / (n-1) dy = 2.0 / (m-1) * Initilize initial condition and RHS !$omp parallel do private(xx,yy) do j = 1,m do i = 1,n xx = -1.0 + dx * real(i-1) ! -1 < x < 1 yy = -1.0 + dy * real(j-1) ! -1 < y < 1 u(i,j) = 0.0 f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy) & - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy) enddo enddo !$omp end parallel do return end subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit) ****************************************************************** * Subroutine HelmholtzJ * Solves poisson equation on rectangular grid assuming : * (1) Uniform discretization in each direction, and * (2) Dirichlect boundary conditions * * Jacobi method is used in this routine * * Input : n,m Number of grid points in the X/Y directions * dx,dy Grid spacing in the X/Y directions * alpha Helmholtz eqn. coefficient * omega Relaxation factor * f(n,m) Right hand side function * u(n,m) Dependent variable/Solution * tol Tolerance for iterative solver * maxit Maximum number of iterations * * Output : u(n,m) - Solution ***************************************************************** implicit none integer n,m,maxit real*4 dx,dy,f(n,m),u(n,m),alpha, tol,omega * * Local variables * integer i,j,k,k_local real*4 error,resid,rsum,ax,ay,b real*4 error_local real(4), allocatable,dimension(:,:):: uold real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2 real te1,te2 real second external second allocate(uold(n,m)) * * Initialize coefficients ax = 1.0/(dx*dx) ! X-direction coef ay = 1.0/(dy*dy) ! Y-direction coef b = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff error = 10.0 * tol k = 1 do while (k.le.maxit .and. error.gt. tol) error = 0.0 * Copy new solution into old !$omp parallel default(shared) !$omp do do j=1,m do i=1,n uold(i,j) = u(i,j) enddo enddo * Compute stencil, residual, & update !$omp do private(resid) reduction(+:error) do j = 2,m-1 do i = 2,n-1 * Evaluate residual resid = (ax*(uold(i-1,j) + uold(i+1,j)) & + ay*(uold(i,j-1) + uold(i,j+1)) & + b * uold(i,j) - f(i,j))/b * Update solution u(i,j) = uold(i,j) - omega * resid * Accumulate residual error error = error + resid*resid end do enddo !$omp enddo nowait !$omp end parallel * Error check k = k + 1 error = sqrt(error)/real(n*m) * enddo ! End iteration loop * print *, 'Total Number of Iterations ', k print *, 'Residual ', error return end subroutine error_check (n,m,alpha,dx,dy,u,f) implicit none ************************************************************ * Checks error between numerical and exact solution * ************************************************************ integer n,m real*4 u(n,m),f(n,m),dx,dy,alpha integer i,j real*4 xx,yy,temp,error dx = 2.0 / (n-1) dy = 2.0 / (m-1) error = 0.0 !$omp parallel do private(xx,yy,temp) reduction(+:error) do j = 1,m do i = 1,n xx = -1.0e0 + dx * real(i-1) yy = -1.0e0 + dy * real(j-1) temp = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy) error = error + temp*temp enddo enddo error = sqrt(error)/real(n*m) print *, 'Solution Error : ',error return end function second() result(rtime) implicit none ! integer :: ic,ir,im real(8) :: rtime ! call system_clock(ic,ir,im) ! rtime= real(ic,8) ! return end