Fortran OpenMP プログラム

Fortran OpenMP Example Program (倍精度)

オリジナルソース:http://www.openmp.org/samples/jacobi.f

オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットし、配列を動的割り当てするようにしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。

      program main
************************************************************
* program to solve a finite difference
* discretization of Helmholtz equation :
* (d2/dx2)u + (d2/dy2)u - alpha u = f
* using Jacobi iterative method.
*
* Modified: Sanjiv Shah,       Kuck and Associates, Inc. (KAI), 1998
* Author:   Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998
*
* Directives are used in this code to achieve paralleism.
* All do loops are parallized with default 'static' scheduling.
*
* Input :  n - grid real*8 in x direction
*          m - grid real*8 in y direction
*          alpha - Helmholtz constant (always greater than 0.0)
*          tol   - error tolerance for iterative solver
*          relax - Successice over relaxation parameter
*          mits  - Maximum iterations for iterative solver
*
* On output
*       : u(n,m) - Dependent variable (solutions)
*       : f(n,m) - Right hand side function
*************************************************************
      implicit none

      integer n,m,mits
      real*8 tol,relax,alpha

      real(8), allocatable,dimension(:,:):: u,f

      common /idat/ n,m,mits
      common /fdat/tol,alpha,relax
*
* Read info
*
      write(*,*) "Input n,m - grid real*8 in x,y direction "
C     read(5,*) n,m
      n = 5120
      m = 10000
      print *, "N=",n,"M=",m
      allocate(u(n,m))
      allocate(f(n,m))

      write(*,*) "Input alpha - Helmholts constant "
C     read(5,*) alpha
      alpha = 1.
      write(*,*) "Input relax - Successive over-relaxation parameter"
C     read(5,*) relax
      relax=0.9
      write(*,*) "Input tol - error tolerance for iterative solver"
C     read(5,*) tol
      tol = 1.d-13
      write(*,*) "Input mits - Maximum iterations for solver"
C     read(5,*) mits
      mits=100

*
* Calls a driver routine
*
      call driver (u,f)

      stop
      end

      subroutine driver (u,f)
*************************************************************
* Subroutine driver ()
* This is where the arrays are allocated and initialzed.
*
* Working varaibles/arrays
*     dx  - grid spacing in x direction
*     dy  - grid spacing in y direction
*************************************************************
      implicit none

      integer n,m,mits,mtemp
      real*8 tol,relax,alpha

      common /idat/ n,m,mits,mtemp
      common /fdat/tol,alpha,relax

      real*8 u(n,m),f(n,m),dx,dy

* Elapsed time function initialized -- SofTek
      integer ic, icr, icm
      real*8 cpu0, cpu1, elapsed, t_ac
      real*8 second
      external second
      call system_clock(ic,icr,icm)
      t_ac= 1.0d0/real(icr,8)
      print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac

* Initialize data

      cpu0 = second()
      call initialize (n,m,alpha,dx,dy,u,f)

* Solve Helmholtz equation

      call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits)

* Check error between exact solution

      call  error_check (n,m,alpha,dx,dy,u,f)
      cpu1 = second()

* Printout Elapsed time (SofTek)

      elapsed = (cpu1 -cpu0) * t_ac
      print '(/,1x,a,F10.3/)',
     & 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed


      return
      end

      subroutine initialize (n,m,alpha,dx,dy,u,f)
******************************************************
* Initializes data
* Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2)
*
******************************************************
      implicit none

      integer n,m
      real*8 u(n,m),f(n,m),dx,dy,alpha

      integer i,j, xx,yy
      real*8 PI
      parameter (PI=3.1415926)

      dx = 2.0 / (n-1)
      dy = 2.0 / (m-1)

* Initilize initial condition and RHS

!$omp parallel do private(xx,yy)
      do j = 1,m
         do i = 1,n
            xx = -1.0 + dx * real(i-1)        ! -1 < x < 1
            yy = -1.0 + dy * real(j-1)        ! -1 < y < 1
            u(i,j) = 0.0
            f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy)
     &           - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy)
         enddo
      enddo
!$omp end parallel do

      return
      end

      subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit)
******************************************************************
* Subroutine HelmholtzJ
* Solves poisson equation on rectangular grid assuming :
* (1) Uniform discretization in each direction, and
* (2) Dirichlect boundary conditions
*
* Jacobi method is used in this routine
*
* Input : n,m   Number of grid points in the X/Y directions
*         dx,dy Grid spacing in the X/Y directions
*         alpha Helmholtz eqn. coefficient
*         omega Relaxation factor
*         f(n,m) Right hand side function
*         u(n,m) Dependent variable/Solution
*         tol    Tolerance for iterative solver
*         maxit  Maximum number of iterations
*
* Output : u(n,m) - Solution
*****************************************************************
      implicit none
      integer n,m,maxit
      real*8 dx,dy,f(n,m),u(n,m),alpha, tol,omega
*
* Local variables
*
      integer i,j,k,k_local
      real*8 error,resid,rsum,ax,ay,b
      real*8 error_local
      real(8), allocatable, dimension(:,:):: uold

      real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2
      real te1,te2
      real second
      external second

      allocate(uold(n,m))
*
* Initialize coefficients
      ax = 1.0/(dx*dx) ! X-direction coef
      ay = 1.0/(dy*dy) ! Y-direction coef
      b  = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff

      error = 10.0 * tol
      k = 1

      do while (k.le.maxit .and. error.gt. tol)

         error = 0.0

* Copy new solution into old
!$omp parallel default(shared)

!$omp do
         do j=1,m
            do i=1,n
               uold(i,j) = u(i,j)
            enddo
         enddo

* Compute stencil, residual, & update

!$omp do private(resid) reduction(+:error)
         do j = 2,m-1
            do i = 2,n-1
*     Evaluate residual
               resid = (ax*(uold(i-1,j) + uold(i+1,j))
     &                + ay*(uold(i,j-1) + uold(i,j+1))
     &                 + b * uold(i,j) - f(i,j))/b
* Update solution
               u(i,j) = uold(i,j) - omega * resid
* Accumulate residual error
                           error = error + resid*resid
            end do
         enddo
!$omp enddo nowait

!$omp end parallel

* Error check

         k = k + 1

         error = sqrt(error)/dble(n*m)
*
      enddo                     ! End iteration loop
*
      print *, 'Total Number of Iterations ', k
      print *, 'Residual                   ', error

      return
      end

      subroutine error_check (n,m,alpha,dx,dy,u,f)
      implicit none
************************************************************
* Checks error between numerical and exact solution
*
************************************************************

      integer n,m
      real*8 u(n,m),f(n,m),dx,dy,alpha

      integer i,j
      real*8 xx,yy,temp,error

      dx = 2.0 / (n-1)
      dy = 2.0 / (m-1)
      error = 0.0

!$omp parallel do private(xx,yy,temp) reduction(+:error)
      do j = 1,m
         do i = 1,n
            xx = -1.0e0 + dx * dble(i-1)
            yy = -1.0e0 + dy * dble(j-1)
            temp  = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy)
            error = error + temp*temp
         enddo
      enddo

      error = sqrt(error)/real(n*m)

      print *, 'Solution Error : ',error

      return
      end
      function second() result(rtime)
      implicit none
!
      integer :: ic,ir,im
      real(8) :: rtime
!
      call system_clock(ic,ir,im)
!
      rtime= real(ic,8)
!
      return
      end

Fortran OpenMP Example Program (単精度single-jacobi.f)

オリジナルのソースに一部改変を行っている。入力データを性能評価用に、特定の入力値をセットしている。このプログラムは、ベンチマークのため収束条件は考慮せず、常に100回のヤコビ法によるループを処理する。 また、時間計測のルーチンを追加している。

      program main
************************************************************
* program to solve a finite difference
* discretization of Helmholtz equation :
* (d2/dx2)u + (d2/dy2)u - alpha u = f
* using Jacobi iterative method.
*
* Modified: Sanjiv Shah,       Kuck and Associates, Inc. (KAI), 1998
* Author:   Joseph Robicheaux, Kuck and Associates, Inc. (KAI), 1998
*
* Directives are used in this code to achieve paralleism.
* All do loops are parallized with default 'static' scheduling.
*
* Input :  n - grid real*4 in x direction
*          m - grid real*4 in y direction
*          alpha - Helmholtz constant (always greater than 0.0)
*          tol   - error tolerance for iterative solver
*          relax - Successice over relaxation parameter
*          mits  - Maximum iterations for iterative solver
*
* On output
*       : u(n,m) - Dependent variable (solutions)
*       : f(n,m) - Right hand side function
*************************************************************
      implicit none

      integer n,m,mits
      real*4 tol,relax,alpha
      real(4), allocatable,dimension(:,:):: u,f

      common /idat/ n,m,mits
      common /fdat/tol,alpha,relax
*
* Read info
*
      write(*,*) "Input n,m - grid real*4 in x,y direction "
C     read(5,*) n,m
      n = 5120
      m = 10000
      print *, "N=",n,"M=",m
      allocate(u(n,m))
      allocate(f(n,m))

      write(*,*) "Input alpha - Helmholts constant "
C     read(5,*) alpha
      alpha = 1.
      write(*,*) "Input relax - Successive over-relaxation parameter"
C     read(5,*) relax
      relax=0.9
      write(*,*) "Input tol - error tolerance for iterative solver"
C     read(5,*) tol
      tol = 1.e-13
      write(*,*) "Input mits - Maximum iterations for solver"
C     read(5,*) mits
      mits=100

*
* Calls a driver routine
*
      call driver(u,f)

      stop
      end

      subroutine driver(u,f)
*************************************************************
* Subroutine driver ()
* This is where the arrays are allocated and initialzed.
*
* Working varaibles/arrays
*     dx  - grid spacing in x direction
*     dy  - grid spacing in y direction
*************************************************************
      implicit none

      integer n,m,mits,mtemp
      real*4 tol,relax,alpha

      common /idat/ n,m,mits,mtemp
      common /fdat/tol,alpha,relax

      real*4 u(n,m),f(n,m),dx,dy

* Elapsed time function initialized -- SofTek
      integer ic, icr, icm
      real*8 cpu0, cpu1, elapsed, t_ac
      real*8 second
      external second
      call system_clock(ic,icr,icm)
      t_ac= 1.0e0/real(icr,8)
      print '(1x,a,e10.5)','Time measurement accuracy : ',t_ac

* Initialize data

      cpu0 = second()
      call initialize (n,m,alpha,dx,dy,u,f)

* Solve Helmholtz equation

      call jacobi (n,m,dx,dy,alpha,relax,u,f,tol,mits)

* Check error between exact solution

      call  error_check (n,m,alpha,dx,dy,u,f)
      cpu1 = second()

* Printout Elapsed time (SofTek)

      elapsed = (cpu1 -cpu0) * t_ac
      print '(/,1x,a,F10.3/)',
     & 'Elpased Time (Initialize + Jacobi solver + Check) : ',elapsed


      return
      end

      subroutine initialize (n,m,alpha,dx,dy,u,f)
******************************************************
* Initializes data
* Assumes exact solution is u(x,y) = (1-x^2)*(1-y^2)
*
******************************************************
      implicit none

      integer n,m
      real*4 u(n,m),f(n,m),dx,dy,alpha

      integer i,j, xx,yy
      real*4 PI
      parameter (PI=3.1415926)

      dx = 2.0 / (n-1)
      dy = 2.0 / (m-1)

* Initilize initial condition and RHS

!$omp parallel do private(xx,yy)
      do j = 1,m
         do i = 1,n
            xx = -1.0 + dx * real(i-1)        ! -1 < x < 1
            yy = -1.0 + dy * real(j-1)        ! -1 < y < 1
            u(i,j) = 0.0
            f(i,j) = -alpha *(1.0-xx*xx)*(1.0-yy*yy)
     &           - 2.0*(1.0-xx*xx)-2.0*(1.0-yy*yy)
         enddo
      enddo
!$omp end parallel do

      return
      end

      subroutine jacobi (n,m,dx,dy,alpha,omega,u,f,tol,maxit)
******************************************************************
* Subroutine HelmholtzJ
* Solves poisson equation on rectangular grid assuming :
* (1) Uniform discretization in each direction, and
* (2) Dirichlect boundary conditions
*
* Jacobi method is used in this routine
*
* Input : n,m   Number of grid points in the X/Y directions
*         dx,dy Grid spacing in the X/Y directions
*         alpha Helmholtz eqn. coefficient
*         omega Relaxation factor
*         f(n,m) Right hand side function
*         u(n,m) Dependent variable/Solution
*         tol    Tolerance for iterative solver
*         maxit  Maximum number of iterations
*
* Output : u(n,m) - Solution
*****************************************************************
      implicit none
      integer n,m,maxit
      real*4 dx,dy,f(n,m),u(n,m),alpha, tol,omega
*
* Local variables
*
      integer i,j,k,k_local
      real*4 error,resid,rsum,ax,ay,b
      real*4 error_local

      real(4), allocatable,dimension(:,:):: uold

      real ta,tb,tc,td,te,ta1,ta2,tb1,tb2,tc1,tc2,td1,td2
      real te1,te2
      real second
      external second

      allocate(uold(n,m))
*
* Initialize coefficients
      ax = 1.0/(dx*dx) ! X-direction coef
      ay = 1.0/(dy*dy) ! Y-direction coef
      b  = -2.0/(dx*dx)-2.0/(dy*dy) - alpha ! Central coeff

      error = 10.0 * tol
      k = 1

      do while (k.le.maxit .and. error.gt. tol)

         error = 0.0

* Copy new solution into old
!$omp parallel default(shared)

!$omp do
         do j=1,m
            do i=1,n
               uold(i,j) = u(i,j)
            enddo
         enddo

* Compute stencil, residual, & update

!$omp do private(resid) reduction(+:error)
         do j = 2,m-1
            do i = 2,n-1
*     Evaluate residual
               resid = (ax*(uold(i-1,j) + uold(i+1,j))
     &                + ay*(uold(i,j-1) + uold(i,j+1))
     &                 + b * uold(i,j) - f(i,j))/b
* Update solution
               u(i,j) = uold(i,j) - omega * resid
* Accumulate residual error
               error = error + resid*resid
            end do
         enddo
!$omp enddo nowait

!$omp end parallel

* Error check

         k = k + 1

         error = sqrt(error)/real(n*m)
*
      enddo                     ! End iteration loop
*
      print *, 'Total Number of Iterations ', k
      print *, 'Residual                   ', error

      return
      end

      subroutine error_check (n,m,alpha,dx,dy,u,f)
      implicit none
************************************************************
* Checks error between numerical and exact solution
*
************************************************************

      integer n,m
      real*4 u(n,m),f(n,m),dx,dy,alpha

      integer i,j
      real*4 xx,yy,temp,error

      dx = 2.0 / (n-1)
      dy = 2.0 / (m-1)
      error = 0.0

!$omp parallel do private(xx,yy,temp) reduction(+:error)
      do j = 1,m
         do i = 1,n
            xx = -1.0e0 + dx * real(i-1)
            yy = -1.0e0 + dy * real(j-1)
            temp  = u(i,j) - (1.0-xx*xx)*(1.0-yy*yy)
            error = error + temp*temp
         enddo
      enddo

      error = sqrt(error)/real(n*m)

      print *, 'Solution Error : ',error

      return
      end
      function second() result(rtime)
      implicit none
!
      integer :: ic,ir,im
      real(8) :: rtime
!
      call system_clock(ic,ir,im)
!
      rtime= real(ic,8)
!
      return
      end