Skip to content
Snippets Groups Projects
Commit 33a869db authored by Paul-Gerhard Reinhard's avatar Paul-Gerhard Reinhard
Browse files

eliminate guru mode in FFTW3

parent 154239ee
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,12 @@ Step 2.0:
- static.f90: Accelerated iteration by flexible step size.
'npsi' always set to even for consistent pairing.
- New makefiles 'Makefile.gfortran' and 'Makefile.gfortran.openmp'
which invoke more optimizations.
- Eliminate guru mode in FFTW3 to make the code compatible with
Intels MKL implementation of FFTW3.
......
COMPILER = gfortran
COMPILERFLAGS = -O3
LINKERFLAGS = -O3
COMPILERFLAGS = -O3 -msse4.2 -mfpmath=sse -ffast-math -finline-functions -funroll-loops
#COMPILERFLAGS = -g -fbacktrace
LINKERFLAGS = $(COMPILERFLAGS)
SOURCE_TRAILER = .f90
EXEC = sky3d.seq
......
COMPILER = gfortran
COMPILERFLAGS = -O3 -msse4.2 -mfpmath=sse -ffast-math -finline-functions -funroll-loops
#COMPILERFLAGS = -g -fbacktrace
LINKERFLAGS = $(COMPILERFLAGS)
SOURCE_TRAILER = .f90
EXEC = sky3d.omp
OBJS = params.o grids.o levels.o fourier.o forces.o sequential.o \
inout.o coulomb.o trivial.o densities.o fragments.o twobody.o \
energies.o static.o meanfield.o dynamic.o pairs.o moment.o \
main3d.o user.o external.o
SHELL = /bin/sh
#---------------------------------------------------------
all:$(EXEC)
$(EXEC):$(OBJS)
$(COMPILER) $(LINKERFLAGS) -o $@ $(OBJS) -lfftw3 -llapack
#
user.o : user.f90 params.o grids.o levels.o
external.o : external.f90 params.o sequential.o grids.o levels.o meanfield.o
coulomb.o : coulomb.f90 params.o grids.o densities.o
inout.o : inout.f90 params.o sequential.o grids.o moment.o densities.o \
levels.o coulomb.o forces.o meanfield.o forces.o
trivial.o : trivial.f90 params.o grids.o
forces.o : forces.f90 forces.data params.o
fourier.o : fourier.f90 params.o grids.o
params.o : params.f90
moment.o : moment.f90 params.o grids.o densities.o
sequential.o: sequential.f90 levels.o params.o grids.o densities.o
levels.o : levels.f90 params.o grids.o forces.o fourier.o trivial.o
grids.o : grids.f90 params.o forces.o
densities.o : densities.f90 params.o grids.o levels.o trivial.o
fragments.o : fragments.f90 params.o grids.o forces.o levels.o twobody.o \
sequential.o
twobody.o : twobody.f90 params.o grids.o densities.o moment.o forces.o
energies.o : energies.f90 params.o forces.o densities.o levels.o grids.o \
trivial.o pairs.o
meanfield.o : meanfield.f90 params.o densities.o forces.o grids.o coulomb.o \
trivial.o levels.o
pairs.o : pairs.f90 forces.o grids.o levels.o densities.o
static.o : static.f90 params.o densities.o grids.o \
levels.o moment.o energies.o inout.o pairs.o meanfield.o
dynamic.o : dynamic.f90 params.o densities.o grids.o trivial.o \
levels.o moment.o energies.o inout.o meanfield.o sequential.o \
twobody.o external.o
main3d.o : main3d.f90 params.o fourier.o forces.o densities.o meanfield.o levels.o grids.o fragments.o sequential.o dynamic.o static.o coulomb.o user.o
.SUFFIXES: .f90 .f .o
%.o : %.mod
.f90.o:
$(COMPILER) $(COMPILERFLAGS) -c $<
.f.o:
$(COMPILER) $(COMPILERFLAGS) -c $<
......@@ -9,33 +9,31 @@ CONTAINS
SUBROUTINE init_fft
INCLUDE 'fftw3.f'
COMPLEX(db),ALLOCATABLE :: p(:,:,:,:,:)
INTEGER,SAVE :: FFTW_planflag ! set option here
FFTW_planflag=FFTW_EXHAUSTIVE
ALLOCATE(p(nx,ny,nz,2,2))
CALL dfftw_plan_dft_3d(pforward,nx,ny,nz,p(:,:,:,1,1),p(:,:,:,1,1), &
FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED)
FFTW_FORWARD, FFTW_planflag)
CALL dfftw_plan_dft_3d(pbackward,nx,ny,nz,p(:,:,:,1,1),p(:,:,:,1,1), &
FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED)
FFTW_BACKWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(xforward,1,(/nx/),2*ny*nz, &
p(:,:,:,:,1),0,1,nx,p(:,:,:,:,2),0,1,nx, &
FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED)
FFTW_FORWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(xbackward,1,(/nx/),2*ny*nz, &
p(:,:,:,:,1),0,1,nx,p(:,:,:,:,1),0,1,nx, &
FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED)
CALL dfftw_plan_guru_dft(yforward,1,ny,nx,nx, &
3,(/nx,nz,2/),(/1,nx*ny,nx*ny*nz/),(/1,nx*ny,nx*ny*nz/), &
p(:,:,:,:,1),p(:,:,:,:,2), &
FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED)
CALL dfftw_plan_guru_dft(ybackward,1,ny,nx,nx, &
3,(/nx,nz,2/),(/1,nx*ny,nx*ny*nz/),(/1,nx*ny,nx*ny*nz/), &
p(:,:,:,:,1),p(:,:,:,:,1), &
FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED)
CALL dfftw_plan_guru_dft(zforward,1,nz,nx*ny,nx*ny, &
3,(/nx,ny,2/),(/1,nx,nx*ny*nz/),(/1,nx,nx*ny*nz/), &
p(:,:,:,:,1),p(:,:,:,:,2), &
FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED)
CALL dfftw_plan_guru_dft(zbackward,1,nz,nx*ny,nx*ny, &
3,(/nx,ny,2/),(/1,nx,nx*ny*nz/),(/1,nx,nx*ny*nz/), &
p(:,:,:,:,1),p(:,:,:,:,1), &
FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED)
FFTW_BACKWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(yforward,1,(/ny/),nx, &
p(:,:,:,:,1),0,nx,1,p(:,:,:,:,2),0,nx,1, &
FFTW_FORWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(ybackward,1,(/ny/),nx, &
p(:,:,:,:,1),0,nx,1,p(:,:,:,:,1),0,nx,1, &
FFTW_BACKWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(zforward,1,(/nz/),nx*ny, &
p(:,:,:,:,1),0,nx*ny,1,p(:,:,:,:,2),0,nx*ny,1, &
FFTW_FORWARD, FFTW_planflag)
CALL dfftw_plan_many_dft(zbackward,1,(/nz/),nx*ny, &
p(:,:,:,:,1),0,nx*ny,1,p(:,:,:,:,1),0,nx*ny,1, &
FFTW_BACKWARD, FFTW_planflag)
IF(wflag) WRITE(*,*) '***** FFTW3 plans established *****'
DEALLOCATE(p)
END SUBROUTINE init_fft
......
......@@ -56,15 +56,23 @@ CONTAINS
COMPLEX(db), INTENT(OUT):: d1psout(:,:,:,:)
COMPLEX(db), INTENT(OUT), OPTIONAL :: d2psout(:,:,:,:)
REAL(db) :: kfac
INTEGER :: iy
INTEGER :: iy,is,k
kfac=(PI+PI)/(dy*ny)
CALL dfftw_execute_dft(yforward,psin,d1psout)
DO is=1,2
DO k=1,nz
CALL dfftw_execute_dft(yforward,psin(:,:,k,is),d1psout(:,:,k,is))
END DO
END DO
IF(PRESENT(d2psout)) THEN
DO iy=1,ny/2
d2psout(:,iy,:,:)=-((iy-1)*kfac)**2*d1psout(:,iy,:,:)/REAL(ny)
d2psout(:,ny-iy+1,:,:)=-(iy*kfac)**2*d1psout(:,ny-iy+1,:,:)/REAL(ny)
ENDDO
CALL dfftw_execute_dft(ybackward,d2psout,d2psout)
DO is=1,2
DO k=1,nz
CALL dfftw_execute_dft(ybackward,d2psout(:,:,k,is),d2psout(:,:,k,is))
END DO
END DO
ENDIF
d1psout(:,1,:,:)=(0.D0,0.D0)
DO iy=2,ny/2
......@@ -75,7 +83,11 @@ CONTAINS
/REAL(ny)
ENDDO
d1psout(:,ny/2+1,:,:)=(0.D0,0.D0)
CALL dfftw_execute_dft(ybackward,d1psout,d1psout)
DO is=1,2
DO k=1,nz
CALL dfftw_execute_dft(ybackward,d1psout(:,:,k,is),d1psout(:,:,k,is))
END DO
END DO
END SUBROUTINE cdervy
!************************************************************
SUBROUTINE cdervz(psin,d1psout,d2psout)
......@@ -83,15 +95,19 @@ CONTAINS
COMPLEX(db), INTENT(OUT):: d1psout(:,:,:,:)
COMPLEX(db), INTENT(OUT), OPTIONAL :: d2psout(:,:,:,:)
REAL(db) :: kfac
INTEGER :: iz
INTEGER :: iz,is
kfac=(PI+PI)/(dz*nz)
CALL dfftw_execute_dft(zforward,psin,d1psout)
DO is=1,2
CALL dfftw_execute_dft(zforward,psin(:,:,:,is),d1psout(:,:,:,is))
END DO
IF(PRESENT(d2psout)) THEN
DO iz=1,nz/2
d2psout(:,:,iz,:)=-((iz-1)*kfac)**2*d1psout(:,:,iz,:)/REAL(nz)
d2psout(:,:,nz-iz+1,:)=-(iz*kfac)**2*d1psout(:,:,nz-iz+1,:)/REAL(nz)
ENDDO
CALL dfftw_execute_dft(zbackward,d2psout,d2psout)
DO is=1,2
CALL dfftw_execute_dft(zbackward,d2psout(:,:,:,is),d2psout(:,:,:,is))
END DO
ENDIF
d1psout(:,:,1,:)=(0.D0,0.D0)
DO iz=2,nz/2
......@@ -102,7 +118,9 @@ CONTAINS
/REAL(nz)
ENDDO
d1psout(:,:,nz/2+1,:)=(0.D0,0.D0)
CALL dfftw_execute_dft(zbackward,d1psout,d1psout)
DO is=1,2
CALL dfftw_execute_dft(zbackward,d1psout(:,:,:,is),d1psout(:,:,:,is))
END DO
END SUBROUTINE cdervz
!************************************************************
SUBROUTINE laplace(psin,psout,e0inv)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment