diff --git a/Code/Changes.txt b/Code/Changes.txt index 4262b8c2f579ddc1dee852ee87776f3ee4dd58ef..859c3bfcc31964bf6decf425da5fea1431bf13b3 100644 --- a/Code/Changes.txt +++ b/Code/Changes.txt @@ -29,6 +29,12 @@ Step 2.0: - static.f90: Accelerated iteration by flexible step size. 'npsi' always set to even for consistent pairing. +- New makefiles 'Makefile.gfortran' and 'Makefile.gfortran.openmp' + which invoke more optimizations. + +- Eliminate guru mode in FFTW3 to make the code compatible with + Intels MKL implementation of FFTW3. + diff --git a/Code/Makefile b/Code/Makefile index 20bf82874a060384dfa52314a9ded9972a00b9f9..3c1eb50ca6619475c50716e82f01c259c3ea370c 100644 --- a/Code/Makefile +++ b/Code/Makefile @@ -1,6 +1,7 @@ COMPILER = gfortran -COMPILERFLAGS = -O3 -LINKERFLAGS = -O3 +COMPILERFLAGS = -O3 -msse4.2 -mfpmath=sse -ffast-math -finline-functions -funroll-loops +#COMPILERFLAGS = -g -fbacktrace +LINKERFLAGS = $(COMPILERFLAGS) SOURCE_TRAILER = .f90 EXEC = sky3d.seq diff --git a/Code/Makefile.gfortran b/Code/Makefile.gfortran new file mode 100644 index 0000000000000000000000000000000000000000..15a3b973eebe7a4ab7df7f962a65302308a9b18e --- /dev/null +++ b/Code/Makefile.gfortran @@ -0,0 +1,78 @@ +COMPILER = gfortran +COMPILERFLAGS = -O3 -msse4.2 -mfpmath=sse -ffast-math -finline-functions -funroll-loops +#COMPILERFLAGS = -g -fbacktrace +LINKERFLAGS = $(COMPILERFLAGS) + +SOURCE_TRAILER = .f90 +EXEC = sky3d.omp +OBJS = params.o grids.o levels.o fourier.o forces.o sequential.o \ + inout.o coulomb.o trivial.o densities.o fragments.o twobody.o \ + energies.o static.o meanfield.o dynamic.o pairs.o moment.o \ + main3d.o user.o external.o + +SHELL = /bin/sh + +#--------------------------------------------------------- +all:$(EXEC) +$(EXEC):$(OBJS) + $(COMPILER) $(LINKERFLAGS) -o $@ $(OBJS) -lfftw3 -llapack +# + +user.o : user.f90 params.o grids.o levels.o + +external.o : external.f90 params.o sequential.o grids.o levels.o meanfield.o + +coulomb.o : coulomb.f90 params.o grids.o densities.o + +inout.o : inout.f90 params.o sequential.o grids.o moment.o densities.o \ + levels.o coulomb.o forces.o meanfield.o forces.o + +trivial.o : trivial.f90 params.o grids.o + +forces.o : forces.f90 forces.data params.o + +fourier.o : fourier.f90 params.o grids.o + +params.o : params.f90 + +moment.o : moment.f90 params.o grids.o densities.o + +sequential.o: sequential.f90 levels.o params.o grids.o densities.o + +levels.o : levels.f90 params.o grids.o forces.o fourier.o trivial.o + +grids.o : grids.f90 params.o forces.o + +densities.o : densities.f90 params.o grids.o levels.o trivial.o + +fragments.o : fragments.f90 params.o grids.o forces.o levels.o twobody.o \ + sequential.o + +twobody.o : twobody.f90 params.o grids.o densities.o moment.o forces.o + +energies.o : energies.f90 params.o forces.o densities.o levels.o grids.o \ + trivial.o pairs.o + +meanfield.o : meanfield.f90 params.o densities.o forces.o grids.o coulomb.o \ + trivial.o levels.o + +pairs.o : pairs.f90 forces.o grids.o levels.o densities.o + +static.o : static.f90 params.o densities.o grids.o \ + levels.o moment.o energies.o inout.o pairs.o meanfield.o + +dynamic.o : dynamic.f90 params.o densities.o grids.o trivial.o \ + levels.o moment.o energies.o inout.o meanfield.o sequential.o \ + twobody.o external.o + +main3d.o : main3d.f90 params.o fourier.o forces.o densities.o meanfield.o levels.o grids.o fragments.o sequential.o dynamic.o static.o coulomb.o user.o + +.SUFFIXES: .f90 .f .o + +%.o : %.mod + +.f90.o: + $(COMPILER) $(COMPILERFLAGS) -c $< + +.f.o: + $(COMPILER) $(COMPILERFLAGS) -c $< diff --git a/Code/fourier.f90 b/Code/fourier.f90 index d56e3feb82938fd1b353a673c90639c384689a3e..75bb3c8007be858df1df0913b4c83e4e4f7d6e87 100644 --- a/Code/fourier.f90 +++ b/Code/fourier.f90 @@ -9,33 +9,31 @@ CONTAINS SUBROUTINE init_fft INCLUDE 'fftw3.f' COMPLEX(db),ALLOCATABLE :: p(:,:,:,:,:) + INTEGER,SAVE :: FFTW_planflag ! set option here + FFTW_planflag=FFTW_EXHAUSTIVE ALLOCATE(p(nx,ny,nz,2,2)) CALL dfftw_plan_dft_3d(pforward,nx,ny,nz,p(:,:,:,1,1),p(:,:,:,1,1), & - FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED) + FFTW_FORWARD, FFTW_planflag) CALL dfftw_plan_dft_3d(pbackward,nx,ny,nz,p(:,:,:,1,1),p(:,:,:,1,1), & - FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED) + FFTW_BACKWARD, FFTW_planflag) CALL dfftw_plan_many_dft(xforward,1,(/nx/),2*ny*nz, & p(:,:,:,:,1),0,1,nx,p(:,:,:,:,2),0,1,nx, & - FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED) + FFTW_FORWARD, FFTW_planflag) CALL dfftw_plan_many_dft(xbackward,1,(/nx/),2*ny*nz, & p(:,:,:,:,1),0,1,nx,p(:,:,:,:,1),0,1,nx, & - FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED) - CALL dfftw_plan_guru_dft(yforward,1,ny,nx,nx, & - 3,(/nx,nz,2/),(/1,nx*ny,nx*ny*nz/),(/1,nx*ny,nx*ny*nz/), & - p(:,:,:,:,1),p(:,:,:,:,2), & - FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED) - CALL dfftw_plan_guru_dft(ybackward,1,ny,nx,nx, & - 3,(/nx,nz,2/),(/1,nx*ny,nx*ny*nz/),(/1,nx*ny,nx*ny*nz/), & - p(:,:,:,:,1),p(:,:,:,:,1), & - FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED) - CALL dfftw_plan_guru_dft(zforward,1,nz,nx*ny,nx*ny, & - 3,(/nx,ny,2/),(/1,nx,nx*ny*nz/),(/1,nx,nx*ny*nz/), & - p(:,:,:,:,1),p(:,:,:,:,2), & - FFTW_FORWARD, FFTW_MEASURE+FFTW_UNALIGNED) - CALL dfftw_plan_guru_dft(zbackward,1,nz,nx*ny,nx*ny, & - 3,(/nx,ny,2/),(/1,nx,nx*ny*nz/),(/1,nx,nx*ny*nz/), & - p(:,:,:,:,1),p(:,:,:,:,1), & - FFTW_BACKWARD, FFTW_MEASURE+FFTW_UNALIGNED) + FFTW_BACKWARD, FFTW_planflag) + CALL dfftw_plan_many_dft(yforward,1,(/ny/),nx, & + p(:,:,:,:,1),0,nx,1,p(:,:,:,:,2),0,nx,1, & + FFTW_FORWARD, FFTW_planflag) + CALL dfftw_plan_many_dft(ybackward,1,(/ny/),nx, & + p(:,:,:,:,1),0,nx,1,p(:,:,:,:,1),0,nx,1, & + FFTW_BACKWARD, FFTW_planflag) + CALL dfftw_plan_many_dft(zforward,1,(/nz/),nx*ny, & + p(:,:,:,:,1),0,nx*ny,1,p(:,:,:,:,2),0,nx*ny,1, & + FFTW_FORWARD, FFTW_planflag) + CALL dfftw_plan_many_dft(zbackward,1,(/nz/),nx*ny, & + p(:,:,:,:,1),0,nx*ny,1,p(:,:,:,:,1),0,nx*ny,1, & + FFTW_BACKWARD, FFTW_planflag) IF(wflag) WRITE(*,*) '***** FFTW3 plans established *****' DEALLOCATE(p) END SUBROUTINE init_fft diff --git a/Code/levels.f90 b/Code/levels.f90 index eb63a65071b026cf8a378f1707b270a60df356d0..6d34dcb5796e08ef99066a219122d5dd76822480 100644 --- a/Code/levels.f90 +++ b/Code/levels.f90 @@ -56,15 +56,23 @@ CONTAINS COMPLEX(db), INTENT(OUT):: d1psout(:,:,:,:) COMPLEX(db), INTENT(OUT), OPTIONAL :: d2psout(:,:,:,:) REAL(db) :: kfac - INTEGER :: iy + INTEGER :: iy,is,k kfac=(PI+PI)/(dy*ny) - CALL dfftw_execute_dft(yforward,psin,d1psout) + DO is=1,2 + DO k=1,nz + CALL dfftw_execute_dft(yforward,psin(:,:,k,is),d1psout(:,:,k,is)) + END DO + END DO IF(PRESENT(d2psout)) THEN DO iy=1,ny/2 d2psout(:,iy,:,:)=-((iy-1)*kfac)**2*d1psout(:,iy,:,:)/REAL(ny) d2psout(:,ny-iy+1,:,:)=-(iy*kfac)**2*d1psout(:,ny-iy+1,:,:)/REAL(ny) ENDDO - CALL dfftw_execute_dft(ybackward,d2psout,d2psout) + DO is=1,2 + DO k=1,nz + CALL dfftw_execute_dft(ybackward,d2psout(:,:,k,is),d2psout(:,:,k,is)) + END DO + END DO ENDIF d1psout(:,1,:,:)=(0.D0,0.D0) DO iy=2,ny/2 @@ -75,7 +83,11 @@ CONTAINS /REAL(ny) ENDDO d1psout(:,ny/2+1,:,:)=(0.D0,0.D0) - CALL dfftw_execute_dft(ybackward,d1psout,d1psout) + DO is=1,2 + DO k=1,nz + CALL dfftw_execute_dft(ybackward,d1psout(:,:,k,is),d1psout(:,:,k,is)) + END DO + END DO END SUBROUTINE cdervy !************************************************************ SUBROUTINE cdervz(psin,d1psout,d2psout) @@ -83,15 +95,19 @@ CONTAINS COMPLEX(db), INTENT(OUT):: d1psout(:,:,:,:) COMPLEX(db), INTENT(OUT), OPTIONAL :: d2psout(:,:,:,:) REAL(db) :: kfac - INTEGER :: iz + INTEGER :: iz,is kfac=(PI+PI)/(dz*nz) - CALL dfftw_execute_dft(zforward,psin,d1psout) + DO is=1,2 + CALL dfftw_execute_dft(zforward,psin(:,:,:,is),d1psout(:,:,:,is)) + END DO IF(PRESENT(d2psout)) THEN DO iz=1,nz/2 d2psout(:,:,iz,:)=-((iz-1)*kfac)**2*d1psout(:,:,iz,:)/REAL(nz) d2psout(:,:,nz-iz+1,:)=-(iz*kfac)**2*d1psout(:,:,nz-iz+1,:)/REAL(nz) ENDDO - CALL dfftw_execute_dft(zbackward,d2psout,d2psout) + DO is=1,2 + CALL dfftw_execute_dft(zbackward,d2psout(:,:,:,is),d2psout(:,:,:,is)) + END DO ENDIF d1psout(:,:,1,:)=(0.D0,0.D0) DO iz=2,nz/2 @@ -102,7 +118,9 @@ CONTAINS /REAL(nz) ENDDO d1psout(:,:,nz/2+1,:)=(0.D0,0.D0) - CALL dfftw_execute_dft(zbackward,d1psout,d1psout) + DO is=1,2 + CALL dfftw_execute_dft(zbackward,d1psout(:,:,:,is),d1psout(:,:,:,is)) + END DO END SUBROUTINE cdervz !************************************************************ SUBROUTINE laplace(psin,psout,e0inv)