diff --git a/Code/linalg_scalapack.f90 b/Code/linalg_scalapack.f90 new file mode 100755 index 0000000000000000000000000000000000000000..f0b69f4278f0553add53ee06de97adce1d2ec5c8 --- /dev/null +++ b/Code/linalg_scalapack.f90 @@ -0,0 +1,95 @@ +MODULE LINALG + USE Params, ONLY: db,cmplxzero,cmplxone + USE Levels + USE Parallel, ONLY: nb,mb,contxt,nstloc_x,nstloc_y,globalindex_x,globalindex_y,nb_psi +! + IMPLICIT NONE + INTEGER :: nlin(2) + INTEGER :: desca(2,10),descz(2,10),descc(2,10),desc_t(2,10)& + , work_t_size(2),iwork_t_size(2),rwork_t_size(2) +! + REAL(db) ,ALLOCATABLE :: rwork_t(:),evals(:) + COMPLEX(db),ALLOCATABLE :: work_t(:),matr_lin(:,:),unitary(:,:) + INTEGER ,ALLOCATABLE :: iwork_t(:) +! + CONTAINS + !************************************************************ + SUBROUTINE init_linalg + INTEGER :: iq,infoconv + DO iq=1,2 + nlin(iq)=npsi(iq)-npmin(iq)+1 + CALL DESCINIT(DESCA(iq,1:10),npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,& + NB,MB,0,0,CONTXT,nstloc_x(iq),infoconv) + CALL DESCINIT(DESCZ(iq,1:10),npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,& + NB,MB,0,0,CONTXT,nstloc_x(iq),infoconv) + CALL DESCINIT(DESCC(iq,1:10),npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,& + NB,MB,0,0,CONTXT,nstloc_x(iq),infoconv) + CALL DESCINIT(DESC_T(iq,1:10),nx*ny*nz*2,npsi(iq)-npmin(iq)+1,nx*ny*nz*2,& + nb_psi,0,0,CONTXT,nx*ny*nz*2,infoconv) + work_t_size(iq) = -1 + iwork_t_size(iq) = -1 + rwork_t_size(iq) = -1 + ALLOCATE(work_t(1),iwork_t(1),rwork_t(1),matr_lin(nstloc_x(iq),nstloc_y(iq)),& + unitary(nstloc_x(iq),nstloc_y(iq)),evals(nlin(iq))) + CALL PZHEEVD('V','L',nlin(iq),matr_lin,1,1,DESCA(iq,1:10),evals,& + unitary,1,1,DESCZ(iq,1:10),work_t,work_t_size(iq),rwork_t,& + rwork_t_size(iq),iwork_t,iwork_t_size(iq),infoconv) + work_t_size(iq) = INT(ABS(work_t(1))) + iwork_t_size(iq) = INT(ABS(iwork_t(1))) + rwork_t_size(iq) = INT(ABS(rwork_t(1))) + DEALLOCATE(work_t,iwork_t,rwork_t,matr_lin,unitary,evals) + END DO + END SUBROUTINE init_linalg + !************************************************************ + SUBROUTINE eigenvecs(matr_in,evecs,evals_out,iq) + INTEGER, INTENT(IN) :: iq + COMPLEX(db), INTENT(IN) :: matr_in(:,:) + COMPLEX(db), INTENT(OUT) :: evecs(:,:) + REAL(db), INTENT(OUT),OPTIONAL :: evals_out(:) + INTEGER :: infoconv + ALLOCATE(work_t(work_t_size(iq)),rwork_t(rwork_t_size(iq)),& + iwork_t(iwork_t_size(iq)),evals(nlin(iq))) + CALL PZHEEVD('V','L',nlin(iq),matr_in,1,1,DESCA(iq,1:10),evals,evecs,1,1,DESCZ(iq,1:10),& + work_t,work_t_size(iq),rwork_t,rwork_t_size(iq),iwork_t,iwork_t_size(iq),infoconv) + IF (PRESENT(evals_out))evals_out=evals + DEALLOCATE(evals,work_t,iwork_t,rwork_t) + END SUBROUTINE eigenvecs + !************************************************************ + SUBROUTINE loewdin(imatr,smatr,iq) + INTEGER, INTENT(IN) :: iq + COMPLEX(db),INTENT(IN) :: imatr(:,:) + COMPLEX(db),INTENT(OUT) :: smatr(:,:) + COMPLEX(db),ALLOCATABLE :: tmatr(:,:) + REAL(db) ,ALLOCATABLE :: eigen_h(:) + INTEGER :: i,it,jt,iy + ALLOCATE(tmatr(nstloc_x(iq),nstloc_y(iq)),eigen_h(nlin(iq))) + CALL eigenvecs(imatr,tmatr,eigen_h,iq) + eigen_h=1.0d0/sqrt(eigen_h) + DO it = 1,nstloc_x(iq) + DO jt = 1,nstloc_y(iq) + iy = globalindex_y(jt,iq)-npmin(iq)+1 + tmatr(it,jt) = tmatr(it,jt)*sqrt(eigen_h(iy)) + ENDDO + ENDDO + CALL PZGEMM('N','C',nlin(iq),nlin(iq),nlin(iq),cmplxone,tmatr,1,1,DESCA(iq,1:10),& + tmatr,1,1,DESCZ(iq,1:10),cmplxzero,smatr,1,1,DESCC(iq,1:10)) + DEALLOCATE(tmatr,eigen_h) + END SUBROUTINE + !************************************************************ + SUBROUTINE comb_orthodiag(unitary_1,unitary_2,unitary,iq) + INTEGER, INTENT(IN) :: iq + COMPLEX(db),INTENT(IN) :: unitary_1(:,:),unitary_2(:,:) + COMPLEX(db),INTENT(OUT) :: unitary(:,:) + CALL PZGEMM('T','T',nlin(iq),nlin(iq),nlin(iq),cmplxone,unitary_1,1,1,DESCA(iq,1:10),unitary_2,& + 1,1,DESCZ(iq,1:10),cmplxzero,unitary,1,1,DESCC(iq,1:10)) + END SUBROUTINE + !************************************************************ + SUBROUTINE recombine(matrix,psi_in,psi_out,iq) + INTEGER, INTENT(IN) :: iq + COMPLEX(db),INTENT(IN) :: psi_in(:),matrix(:,:) + COMPLEX(db),INTENT(OUT) :: psi_out(:) + CALL zgemv('N',nstloc_x(iq),nstloc_y(iq),cmplxone,matrix,nstloc_x(iq),psi_in,1,cmplxzero,& + psi_out,1) + + END SUBROUTINE +END MODULE LINALG diff --git a/Code/parallel.f90 b/Code/parallel.f90 index 9038219d659b3a39195ddcb3a93198e51be15d72..03253907809dab87d616c2dac75ce2fe38a8d08f 100644 --- a/Code/parallel.f90 +++ b/Code/parallel.f90 @@ -5,13 +5,24 @@ MODULE Parallel IMPLICIT NONE INCLUDE 'mpif.h' SAVE - LOGICAL,PARAMETER :: tmpi=.TRUE. - INTEGER, ALLOCATABLE :: node(:),localindex(:),globalindex(:) - INTEGER :: mpi_nprocs,mpi_ierror,mpi_myproc + INTEGER, PARAMETER :: NB=32,MB=32,NB_psi = 32 + LOGICAL, PARAMETER :: tmpi=.TRUE.,ttabc=.FALSE. + INTEGER, ALLOCATABLE :: node(:),localindex(:),globalindex(:),& + node_x(:),node_y(:),localindex_x(:),localindex_y(:),& + globalindex_x(:,:),globalindex_y(:,:) + INTEGER, ALLOCATABLE :: recvcounts(:,:),displs(:,:) + INTEGER, ALLOCATABLE :: node_2dto1d(:,:),node_1dto2d_x(:),node_1dto2d_y(:) + INTEGER :: mpi_nprocs,mpi_ierror,mpi_myproc + INTEGER :: comm2d,mpi_dims(2),mpi_mycoords(2),nstloc_x(2),nstloc_y(2),first(2) + INTEGER :: comm2d_x,comm2d_y,mpi_size_x,mpi_size_y,mpi_rank_x,mpi_rank_y + INTEGER :: NPROCS,NPROW,NPCOL,MYPROW,MYPCOL,CONTXT,IAM + INTEGER, EXTERNAL :: NUMROC,INDXL2G,INDXG2L,INDXG2P + REAL(db) :: timer(20) CONTAINS !*********************************************************************** SUBROUTINE alloc_nodes - ALLOCATE(node(nstmax),localindex(nstmax),globalindex(nstmax)) + ALLOCATE(node(nstmax),localindex(nstmax),globalindex(nstmax),& + localindex_x(nstmax),localindex_y(nstmax)) END SUBROUTINE alloc_nodes !*********************************************************************** SUBROUTINE init_all_mpi @@ -22,15 +33,123 @@ CONTAINS IF(wflag) WRITE(*,'(a,i5)') ' number of nodes=',mpi_nprocs CALL mpi_barrier (mpi_comm_world, mpi_ierror) END SUBROUTINE init_all_mpi + !************************************************************************ + SUBROUTINE mpi_init_filename + CONTINUE + END SUBROUTINE mpi_init_filename + !************************************************************************ + FUNCTION tabc_av(val) + REAL(db),INTENT(IN) :: val + REAL(db) :: tabc_av + tabc_av=val + END FUNCTION tabc_av + !************************************************************************ + FUNCTION tabc_dens(density) + REAL(db),INTENT(IN) :: density(nx,ny,nz,2) + REAL(db) :: tabc_dens(nx,ny,nz,2) + tabc_dens=density + END FUNCTION tabc_dens + !************************************************************************ + FUNCTION tabc_vec_dens(density) + REAL(db),INTENT(IN) :: density(nx,ny,nz,3,2) + REAL(db) :: tabc_vec_dens(nx,ny,nz,3,2) + tabc_vec_dens=density + END FUNCTION tabc_vec_dens + !************************************************************************ + FUNCTION tabc_filename(filename) + CHARACTER(64),INTENT(IN) :: filename + CHARACTER(64) :: tabc_filename + tabc_filename=filename + END FUNCTION tabc_filename + !*********************************************************************** + SUBROUTINE init_mpi_2d + !*********************************************************************** + ! calculates 2d wave function distribution over nodes + !*********************************************************************** + LOGICAL :: isperiodic(2),reorder + INTEGER :: i,is,noffset + isperiodic=.FALSE. + reorder=.FALSE. + + !Calculate best dimensions for the given number of processes + CALL mpi_dims_create(mpi_nprocs,2,mpi_dims,mpi_ierror) + + !Create 2-dimensional grid of processes to calculate the matrices in diagstep + CALL mpi_cart_create(mpi_comm_world,2,mpi_dims,isperiodic,reorder,comm2d,mpi_ierror) + + !get my coordinates on the grid + CALL mpi_cart_get(comm2d,2,mpi_dims,isperiodic,mpi_mycoords,mpi_ierror) + IF(wflag)WRITE(*,*)'Initialized 2d-grid with dimensions ',mpi_dims(1),' and',mpi_dims(2) + + !Create communicators for x- and y-directions + CALL mpi_comm_split(comm2d,mpi_mycoords(2),mpi_mycoords(1),comm2d_x,mpi_ierror) + CALL mpi_comm_split(comm2d,mpi_mycoords(1),mpi_mycoords(2),comm2d_y,mpi_ierror) + + !determine their sizes and ranks (is already known, just to make sure) + CALL mpi_comm_size(comm2d_x,mpi_size_x,mpi_ierror) + CALL mpi_comm_rank(comm2d_x,mpi_rank_x,mpi_ierror) + CALL mpi_comm_size(comm2d_y,mpi_size_y,mpi_ierror) + CALL mpi_comm_rank(comm2d_y,mpi_rank_y,mpi_ierror) + + CALL init_blacs + + DO is=1,2 + nstloc_x(is) = NUMROC(npsi(is)-npmin(is)+1,NB,MYPROW,0,NPROW) + nstloc_y(is) = NUMROC(npsi(is)-npmin(is)+1,MB,MYPCOL,0,NPCOL) + END DO + + ALLOCATE(node_x(nstmax),node_y(nstmax),globalindex_x(nstmax,2),globalindex_y(nstmax,2)) + + globalindex_x=0 + globalindex_y=0 + + DO is=1,2 + noffset=npmin(is)-1 + DO i=npmin(is),npsi(is) + localindex_x(i) = INDXG2L(i-noffset, NB, MYPROW, 0, NPROW) + localindex_y(i) = INDXG2L(i-noffset, MB, MYPCOL, 0, NPCOL) + node_x(i) = INDXG2P(i-noffset, NB, MYPROW, 0, NPROW) + node_y(i) = INDXG2P(i-noffset, MB, MYPCOL, 0, NPCOL) + IF(node_x(i)==mpi_rank_x) globalindex_x(localindex_x(i),is)=i + IF(node_y(i)==mpi_rank_y) globalindex_y(localindex_y(i),is)=i + END DO + END DO + + END SUBROUTINE init_mpi_2d +!*************************************************************************** + SUBROUTINE init_blacs + CALL BLACS_PINFO(IAM,NPROCS) + IF (NPROCS.LT.1) THEN + CALL BLACS_SETUP(IAM,NPROCS) + END IF + NPROW=mpi_dims(1) + NPCOL=mpi_dims(2) + CALL BLACS_GET(0,0,CONTXT) + CALL BLACS_GRIDINIT(CONTXT,'Row',NPROW,NPCOL) + CALL BLACS_GRIDINFO(CONTXT,NPROW,NPCOL,MYPROW,MYPCOL) + IF(mpi_rank_x/=MYPROW.OR.mpi_rank_y/=MYPCOL) STOP 'BLACS and MPI init is different' + END SUBROUTINE init_blacs !*********************************************************************** SUBROUTINE associate_nodes - INTEGER :: ncount,nst,ip,iloc + !*********************************************************************** + ! calculates 1d wave function distribution over nodes + !*********************************************************************** + INTEGER :: ncount,nst,ip,iloc,iq,mpi_ierror ncount=0 - DO nst=1,nstmax - ncount=MOD(ncount,mpi_nprocs) - node(nst)=ncount - ncount=ncount+1 - ENDDO + globalindex=0 + node=0 +! DO iq=1,2 +! DO nst=1,nstloc_x(iq) +! IF(INT(REAL(nst-1)/nstloc_x(iq)*mpi_size_y)==mpi_rank_y) & +! node(globalindex_x(nst,iq))=mpi_myproc +! END DO +! END DO +! CALL mpi_allreduce(MPI_IN_PLACE,node,nstmax,mpi_integer,mpi_sum,mpi_comm_world,mpi_ierror) + DO iq=1,2 + DO nst=npmin(iq),npsi(iq) + node(nst)=MOD((nst-npmin(iq))/nb_psi,mpi_nprocs) + END DO + END DO nstloc=0 DO nst=1,nstmax IF(node(nst)==mpi_myproc) THEN @@ -39,25 +158,35 @@ CONTAINS ENDIF ENDDO DO ip=0,mpi_nprocs-1 - iloc=0 - DO nst=1,nstmax - IF(node(nst)==ip) THEN - iloc=iloc+1 - localindex(nst)=iloc - ENDIF - ENDDO + iloc=0 + DO nst=1,nstmax + IF(node(nst)==ip) THEN + iloc=iloc+1 + localindex(nst)=iloc + ENDIF + ENDDO ENDDO - IF(wflag) THEN - WRITE(*,'(A/(1X,20I4))') & - ' sorting of wfs on nodes:',node(1:nstmax) - ENDIF CALL mpi_barrier (mpi_comm_world, mpi_ierror) + CALL mpi_relate_comm END SUBROUTINE associate_nodes +!*********************************************************************** + SUBROUTINE mpi_relate_comm + ALLOCATE(node_1dto2d_x(0:mpi_nprocs-1),node_1dto2d_y(0:mpi_nprocs-1),& + node_2dto1d(0:mpi_size_x-1,0:mpi_size_y-1)) + node_1dto2d_x=0 + node_1dto2d_y=0 + node_2dto1d=0 + node_1dto2d_x(mpi_myproc)=mpi_rank_x + node_1dto2d_y(mpi_myproc)=mpi_rank_y + node_2dto1d(mpi_rank_x,mpi_rank_y)=mpi_myproc + CALL mpi_allreduce(MPI_IN_PLACE,node_1dto2d_x,mpi_nprocs,mpi_integer,mpi_sum,mpi_comm_world,mpi_ierror) + CALL mpi_allreduce(MPI_IN_PLACE,node_1dto2d_y,mpi_nprocs,mpi_integer,mpi_sum,mpi_comm_world,mpi_ierror) + CALL mpi_allreduce(MPI_IN_PLACE,node_2dto1d,mpi_nprocs,mpi_integer,mpi_sum,mpi_comm_world,mpi_ierror) + END SUBROUTINE mpi_relate_comm !*********************************************************************** SUBROUTINE collect_densities USE Densities, ONLY : rho,tau,current,sodens,sdens REAL(db) :: tmp_rho(nx,ny,nz,2),tmp_current(nx,ny,nz,3,2) - REAL(db) :: rsum CALL mpi_barrier (mpi_comm_world, mpi_ierror) CALL mpi_allreduce(rho,tmp_rho,2*nx*ny*nz, & mpi_double_precision,mpi_sum,mpi_comm_world,mpi_ierror) @@ -108,9 +237,137 @@ CONTAINS sp_parity=tmpgat CALL mpi_barrier (mpi_comm_world,mpi_ierror) END SUBROUTINE collect_sp_properties +!*********************************************************************** + SUBROUTINE collect_energies(delesum,sumflu) + !*********************************************************************** + ! collects s.p. energies, fluctuation measure and energy differences + !*********************************************************************** + REAL(db),INTENT(INOUT) :: delesum,sumflu + REAL(db) :: tmpgat(nstmax),tmpgat3(3,nstmax) + INTEGER :: nst + CALL mpi_barrier (mpi_comm_world,mpi_ierror) + DO nst=1,nstmax + IF(node(nst)/=mpi_myproc) THEN + sp_energy(nst)=0.0d0 + sp_efluct1(nst)=0.0d0 + sp_efluct2(nst)=0.0d0 + END IF + ENDDO + CALL mpi_allreduce(sp_energy,tmpgat,nstmax,mpi_double_precision,mpi_sum, & + mpi_comm_world,mpi_ierror) + sp_energy=tmpgat + CALL mpi_allreduce(sumflu,tmpgat(1),1,mpi_double_precision,mpi_sum, & + mpi_comm_world,mpi_ierror) + sumflu=tmpgat(1) + CALL mpi_allreduce(delesum,tmpgat(1),1,mpi_double_precision,mpi_sum, & + mpi_comm_world,mpi_ierror) + delesum=tmpgat(1) + CALL mpi_barrier (mpi_comm_world,mpi_ierror) + END SUBROUTINE collect_energies +!***********************************************************************i + SUBROUTINE mpi_wf_1d2x(psi,psi_x,iq) + USE Trivial + COMPLEX(db),INTENT(IN) :: psi(:,:,:,:,:) + COMPLEX(db),INTENT(OUT) :: psi_x(:,:) + INTEGER, INTENT(IN) :: iq + INTEGER :: nst,ierr,is + IF(.NOT.ALLOCATED(recvcounts)) THEN + ALLOCATE(recvcounts(0:mpi_size_y-1,2),displs(0:mpi_size_y-1,2)) + recvcounts=0 + displs=0 + first(1)=1 + DO is=1,2 + DO nst=1,nstloc_x(is) + recvcounts(node_1dto2d_y(node(globalindex_x(nst,is))),is)=& + recvcounts(node_1dto2d_y(node(globalindex_x(nst,is))),is)+1 + END DO + END DO + first(2)=recvcounts(mpi_rank_y,1)+1 + recvcounts=recvcounts*size(psi(:,:,:,:,1)) + DO is=1,2 + DO nst=1,mpi_size_y-1 + displs(nst,is)=SUM(recvcounts(0:nst-1,is)) + END DO + END DO + END IF + psi_x=0.0d0 + CALL mpi_allgatherv(psi(:,:,:,:,first(iq)),recvcounts(mpi_rank_y,iq),mpi_double_complex,& + psi_x,recvcounts(:,iq),displs(:,iq),mpi_double_complex,comm2d_y,ierr) + END SUBROUTINE mpi_wf_1d2x +!*********************************************************************** + SUBROUTINE mpi_wf_x2y(psi_x,psi_y,iq) + COMPLEX(db),INTENT(IN) :: psi_x(:,:) + COMPLEX(db),INTENT(OUT) :: psi_y(:,:) + INTEGER, INTENT(IN) :: iq + INTEGER :: nst,ierr,is,lastnode,ip,first,last + INTEGER,ALLOCATABLE,SAVE :: rootnode(:,:),firstwf(:,:),nwf(:,:) + IF(.NOT.ALLOCATED(rootnode)) THEN + nst=MAX(nstloc_y(1)+1,nstloc_y(2)+1) + ALLOCATE(rootnode(nst,2),firstwf(nst,2),nwf(nst,2)) + rootnode=0 + firstwf=0 + nwf=0 + DO is=1,2 + lastnode=-1 + ip=0 + DO nst=1,nstloc_y(is) + IF(lastnode==node_x(globalindex_y(nst,is))) THEN + nwf(ip,is)=nwf(ip,is)+1 + ELSE + ip=ip+1 + firstwf(ip,is)=nst + nwf(ip,is)=1 + lastnode=node_x(globalindex_y(nst,is)) + rootnode(ip,is)=lastnode + END IF + END DO + END DO + END IF + psi_y=0.0d0 + ip=1 + DO WHILE (nwf(ip,iq)>0) + first=firstwf(ip,iq) + last=firstwf(ip,iq)+nwf(ip,iq)-1 + IF(rootnode(ip,iq)==mpi_rank_x) & + psi_y(:,first:last)=& + psi_x(:,localindex_x(globalindex_y(first,iq)):localindex_x(globalindex_y(last,iq))) + CALL mpi_bcast(psi_y(:,first:last),size(psi_y(:,first:last)),mpi_double_complex,& + rootnode(ip,iq),comm2d_x,ierr) + ip=ip+1 + END DO + END SUBROUTINE mpi_wf_x2y +!*********************************************************************** + SUBROUTINE collect_wf_1d_x(psi,psi_x,iq) + !********************************************************************************* + ! adds all |psi> together and copies them to 1d distribution over nodes. + ! Copies each wave function at a time + !********************************************************************************* + USE Trivial + INTEGER,INTENT(IN) :: iq + COMPLEX(db),INTENT(INOUT) :: psi_x(:,:) + COMPLEX(db),INTENT(OUT) :: psi(:,:,:,:,:) + INTEGER :: ierr + CALL mpi_reduce_scatter(psi_x,psi(:,:,:,:,first(iq)),recvcounts(:,iq),mpi_double_complex,& + mpi_sum,comm2d_y,ierr) + END SUBROUTINE collect_wf_1d_x !*********************************************************************** SUBROUTINE finish_mpi INTEGER :: ierr CALL mpi_finalize(ierr) END SUBROUTINE finish_mpi +!*********************************************************************** + SUBROUTINE mpi_start_timer(index) + INTEGER,INTENT(IN) :: index + INTEGER :: ierr + CALL mpi_barrier (mpi_comm_world,ierr) + timer(index)=mpi_wtime() + END SUBROUTINE mpi_start_timer +!*********************************************************************** + SUBROUTINE mpi_stop_timer(index,textline) + INTEGER,INTENT(IN) :: index + CHARACTER(*),INTENT(IN) :: textline + INTEGER :: ierr + CALL mpi_barrier (mpi_comm_world,ierr) + IF(wflag)WRITE(*,'(A20,F10.4)')textline,mpi_wtime()-timer(index) + END SUBROUTINE mpi_stop_timer END MODULE Parallel diff --git a/Code/static.f90 b/Code/static.f90 index 6b6df56d85c1370f71b88022bd65eb8b08e38899..5100d6ab7fa19ff1147ce609283f4f71bda2f24d 100644 --- a/Code/static.f90 +++ b/Code/static.f90 @@ -6,21 +6,25 @@ MODULE Static USE Grids USE Moment USE Energies + USE Parallel USE Inout, ONLY: write_wavefunctions, write_densities, plot_density, & sp_properties,start_protocol - USE Pairs, ONLY: pair,epair + USE Pairs, ONLY: pair,epair,avdelt,avdeltv2,avg,eferm IMPLICIT NONE LOGICAL :: tdiag=.FALSE. LOGICAL :: tlarge=.FALSE. LOGICAL :: tvaryx_0=.FALSE. - INTEGER :: maxiter + LOGICAL :: ttime=.FALSE. + INTEGER :: maxiter,outerpot=0 REAL(db) :: radinx,radiny,radinz, & serr,delesum,x0dmp=0.2D0,e0dmp=100.D0,x0dmpmin=0.2d0 + CHARACTER(1) :: outertype='N' CONTAINS !************************************************************************* SUBROUTINE getin_static NAMELIST/static/ tdiag,tlarge,maxiter, & - radinx,radiny,radinz,serr,x0dmp,e0dmp,nneut,nprot,npsi,tvaryx_0 + radinx,radiny,radinz,serr,x0dmp,e0dmp,nneut,nprot,npsi,tvaryx_0,& + outerpot,outertype,ttime npsi=0 READ(5,static) IF(nof<=0) THEN @@ -46,29 +50,43 @@ CONTAINS charge_number=nprot mass_number=nneut+nprot x0dmpmin=x0dmp - WRITE(*,*) "x0dmpmin=", x0dmpmin END IF END SUBROUTINE getin_static !************************************************************************* SUBROUTINE init_static + !*********************************************************************** + !begins protocols, inits damping and calculates zpe correction + !*********************************************************************** IF(wflag) THEN + WRITE(*,*) WRITE(*,*) '***** Parameters for static calculation *****' WRITE(*,"(3(A,I4))") ' Neutrons:',nneut,' in ',npsi(1),' levels' WRITE(*,"(3(A,I4))") ' Protons :',nprot,' in ',npsi(2)-npsi(1),' levels' - WRITE(*,"(A,I6)") " Maximum number of iterations: ",maxiter + WRITE(*,"(A,I6)") " Maximum number of iterations: ",maxiter + IF(tvaryx_0) THEN + WRITE(*,"(2(A,G12.5))") ' Min. damping coefficient:',x0dmpmin, & + " Damping energy scale: ",e0dmp + ELSE WRITE(*,"(2(A,G12.5))") ' Damping coefficient:',x0dmp, & " Damping energy scale: ",e0dmp + END IF WRITE(*,"(A,1PE12.4)") " Convergence limit: ",serr ! initialize *.res files CALL start_protocol(converfile, & '# Iter Energy d_Energy h**2 h*h rms & - &beta2 gamma') + &beta2 gamma x_0') CALL start_protocol(dipolesfile, & '# Iter c.m. x-y-z Isovector& &dipoles x-y-z') CALL start_protocol(spinfile, & '# Iter Lx Ly Lz Sx Sy & &Sz Jx Jy Jz') + CALL start_protocol(energiesfile, & + '# Iter N(n) N(p) E(sum) E(integ) Ekin & + &E_Coul ehfCrho0 ehfCrho1 ehfCdrho0 ehfCdrh & + &ehfCtau0 ehfCtau1 ehfCdJ0 ehfCdJ1') + IF(tabc_nprocs>1.AND.tabc_myid==0) CALL start_protocol(tabcfile, & + '# Iter Energy E_kin E_Coul E_Skyrme ') ENDIF ! calculate damping matrices IF(e0dmp>0.0D0) CALL setup_damping(e0dmp) @@ -80,12 +98,28 @@ CONTAINS END SUBROUTINE init_static !************************************************************************* SUBROUTINE statichf - LOGICAL, PARAMETER :: taddnew=.TRUE. ! mix old and new densities - INTEGER :: iq,nst,firstiter - REAL(db) :: sumflu,denerg - REAL(db),PARAMETER :: addnew=0.2D0,addco=1.0D0-addnew + USE Linalg, ONLY: init_linalg + LOGICAL, PARAMETER :: taddnew=.TRUE. ! mix old and new densities + INTEGER :: iq,nst,firstiter,number_threads + REAL(db) :: sumflu,denerg + REAL(db) , PARAMETER :: addnew=0.2D0,addco=1.0D0-addnew + INTEGER, EXTERNAL :: omp_get_num_threads + !*********************************************************************** + ! + !performs static iterations + ! + !*********************************************************************** + ! + !*********************************************************************** ! Step 1: initialization - IF(tdiag) ALLOCATE(hmatr(nstmax,nstmax)) + !*********************************************************************** + number_threads=1 + !$OMP PARALLEL + !$ number_threads=OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + IF(wflag)WRITE(*,*)'number of threads= ',number_threads + IF(wflag)WRITE(*,*) + CALL init_linalg IF(trestart) THEN firstiter=iter+1 ELSE @@ -96,66 +130,106 @@ CONTAINS sp_efluct2=0.D0 sp_norm=0.0D0 sumflu=0.D0 - CALL schmid + IF(wflag)WRITE(*,'(A29)',advance="no") 'Initial orthogonalization... ' + DO iq=1,2 + CALL diagstep(iq,.FALSE.) + END DO + IF(wflag)WRITE(*,*)'DONE' END IF + !**************************************************** ! Step 2: calculate densities and mean field + !**************************************************** rho=0.0D0 tau=0.0D0 current=0.0D0 sdens=0.0D0 sodens=0.0D0 - DO nst=1,nstmax - CALL add_density(isospin(nst),wocc(nst),psi(:,:,:,:,nst), & - rho,tau,current,sdens,sodens) + IF(wflag)WRITE(*,'(A25)',advance="no")'Initial add_density... ' + DO nst=1,nstloc + CALL add_density(isospin(globalindex(nst)),wocc(globalindex(nst)),& + psi(:,:,:,:,nst),rho,tau,current,sdens,sodens) ENDDO - CALL skyrme + IF(tmpi) CALL collect_densities!sum densities over all nodes + IF(wflag)WRITE(*,*) 'DONE' + IF(wflag)WRITE(*,'(A25)',advance="no")'Initial skyrme... ' + CALL skyrme(iter<=outerpot,outertype) + IF(wflag)WRITE(*,*) 'DONE' + !**************************************************** ! Step 3: initial gradient step + !**************************************************** delesum=0.0D0 sumflu=0.0D0 + IF(wflag)WRITE(*,'(A25)',advance="no")'Initial grstep... ' !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nst,denerg) & - !$OMP SCHEDULE(STATIC) REDUCTION(+: sumflu , delesum) - DO nst=1,nstmax - CALL grstep(nst,isospin(nst),sp_energy(nst),denerg, & - psi(:,:,:,:,nst)) - sumflu=sumflu+wocc(nst)*sp_efluct1(nst) - delesum=delesum+wocc(nst)*denerg + !$OMP SCHEDULE(STATIC) REDUCTION(+: sumflu , delesum) + DO nst=1,nstloc + CALL grstep(globalindex(nst),isospin(globalindex(nst)),sp_energy(globalindex(nst)),denerg,psi(:,:,:,:,nst)) + sumflu=sumflu+wocc(globalindex(nst))*sp_efluct1(globalindex(nst)) + delesum=delesum+wocc(globalindex(nst))*denerg ENDDO !$OMP END PARALLEL DO + IF(tmpi) CALL collect_energies(delesum,sumflu)!collect fluctuations and change in energy + IF(wflag)WRITE(*,*) 'DONE' ! pairing and orthogonalization - IF(ipair/=0) CALL pair - CALL schmid + IF(ipair/=0) THEN + IF(tmpi) STOP 'PAIRING does NOT work yet with MPI'! to be fixed... + CALL pair + END IF + IF(wflag)WRITE(*,'(A25)',advance="no") 'Initial ortho2... ' + DO iq=1,2 + CALL diagstep(iq,.FALSE.) + END DO + IF(wflag)WRITE(*,*) 'DONE' ! produce and print detailed information CALL sp_properties - CALL sinfo + IF(tmpi) THEN + DO nst=1,nstmax + IF(node(nst)/=mpi_myproc) sp_energy(nst)=0.0d0 + END DO + CALL collect_sp_properties!collect single particle properties + END IF + CALL sinfo(wflag) !set x0dmp to 3* its value to get faster convergence IF(tvaryx_0) x0dmp=3.0d0*x0dmp + !**************************************************** ! step 4: start static iteration loop - Iteration: DO iter=firstiter,maxiter - WRITE(*,'(a,i6,a,F12.4)') ' Static Iteration No.',iter,' x0dmp= ',x0dmp + !**************************************************** + Iteration: DO iter=firstiter,maxiter + IF(tmpi) CALL mpi_start_timer(1) + IF(wflag)WRITE(*,'(a,i6)') ' Static Iteration No.',iter + !**************************************************** ! Step 5: gradient step + !**************************************************** delesum=0.0D0 sumflu=0.0D0 + IF(ttime.AND.tmpi) CALL mpi_start_timer(2) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nst,denerg) & - !$OMP SCHEDULE(STATIC) REDUCTION(+: sumflu , delesum) - DO nst=1,nstmax - CALL grstep(nst,isospin(nst),sp_energy(nst),denerg, & - psi(:,:,:,:,nst)) - sumflu=sumflu+wocc(nst)*sp_efluct1(nst) - delesum=delesum+wocc(nst)*denerg + !$OMP SCHEDULE(STATIC) REDUCTION(+: sumflu , delesum) + DO nst=1,nstloc + CALL grstep(globalindex(nst),isospin(globalindex(nst)),sp_energy(globalindex(nst)),denerg,psi(:,:,:,:,nst)) + sumflu=sumflu+wocc(globalindex(nst))*sp_efluct1(globalindex(nst)) + delesum=delesum+wocc(globalindex(nst))*denerg ENDDO !$OMP END PARALLEL DO - ! Step 6: diagonalize if desired - IF(tdiag.AND.iter>20) THEN - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(iq) SCHEDULE(STATIC) - DO iq=1,2 - CALL diagstep(iq,npsi(iq)-npmin(iq)+1) - ENDDO - !$OMP END PARALLEL DO - ENDIF - ! Step 7: do pairing and orthogonalization - IF(ipair/=0) CALL pair - CALL schmid + IF(tmpi) CALL collect_energies(delesum,sumflu)!collect fluctuation and change in energy + IF(ttime.AND.tmpi) CALL mpi_stop_timer(2,'grstep: ') + !**************************************************** + ! Step 6: diagonalize and orthonormalize + !**************************************************** + sp_norm=0.0d0 + DO iq=1,2 + CALL diagstep(iq,tdiag) + ENDDO + !**************************************************** + ! Step 7: do pairing + !**************************************************** + IF(ipair/=0) THEN + IF(tmpi) STOP 'PAIRING does NOT work yet with MPI' + CALL pair + END IF + !**************************************************** ! Step 8: get new densities and fields with relaxation + !**************************************************** IF(taddnew) THEN upot=rho bmass=tau @@ -165,35 +239,47 @@ CONTAINS current=0.0D0 sdens=0.0D0 sodens=0.0D0 + IF(ttime.AND.tmpi) CALL mpi_start_timer(2) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nst) SCHEDULE(STATIC) & !$OMP REDUCTION(+:rho, tau, current, sdens, sodens) - DO nst=1,nstmax - CALL add_density(isospin(nst),wocc(nst),psi(:,:,:,:,nst), & - rho,tau,current,sdens,sodens) + DO nst=1,nstloc + CALL add_density(isospin(globalindex(nst)),wocc(globalindex(nst)),& + psi(:,:,:,:,nst),rho,tau,current,sdens,sodens) ENDDO !$OMP END PARALLEL DO + IF(tmpi) CALL collect_densities!collect densities from all nodes IF(taddnew) THEN rho=addnew*rho+addco*upot tau=addnew*tau+addco*bmass ENDIF - CALL skyrme + IF(ttime.AND.tmpi) CALL mpi_stop_timer(2,'add density: ') + IF(ttime.AND.tmpi) CALL mpi_start_timer(2) + CALL skyrme(iter<=outerpot,outertype) + IF(ttime.AND.tmpi) CALL mpi_stop_timer(2,'skyrme: ') ! calculate and print information - IF(mprint>0.AND.MOD(iter,mprint)==0) THEN - CALL sp_properties - CALL sinfo - ELSEIF(tvaryx_0) THEN - CALL sp_properties - CALL sum_energy - ENDIF + IF(ttime.AND.tmpi)CALL mpi_start_timer(2) + CALL sp_properties + IF(tmpi) THEN + DO nst=1,nstmax + IF(node(nst)/=mpi_myproc) sp_energy(nst)=0.0d0 + END DO + CALL collect_sp_properties!collect single particle properties + END IF + IF(ttime.AND.tmpi) CALL mpi_stop_timer(2,'sp properties: ') + CALL sinfo(mprint>0.AND.MOD(iter,mprint)==0.AND.wflag) + !**************************************************** ! Step 9: check for convergence, saving wave functions - IF(sumflu/nstmax<serr.AND.iter>1) THEN + !**************************************************** + IF(sumflu/nstmax<serr.AND.iter>1.AND..NOT.ttabc) THEN CALL write_wavefunctions EXIT Iteration END IF IF(MOD(iter,mrest)==0) THEN CALL write_wavefunctions ENDIF - ! Step 10: update step size for the next iteration + !*********************************************************************** + ! Step 10: calculate new step size + !*********************************************************************** IF(tvaryx_0) THEN IF(ehf<ehfprev .OR. efluct1<(efluct1prev*(1.0d0-1.0d-5)) & .OR. efluct2<(efluct2prev*(1.0d0-1.0d-5))) THEN @@ -207,7 +293,6 @@ CONTAINS ehfprev=ehf END IF END DO Iteration - IF(tdiag) DEALLOCATE(hmatr) END SUBROUTINE statichf !************************************************************************* SUBROUTINE grstep(nst,iq,spe,denerg,psin) @@ -226,28 +311,31 @@ CONTAINS INTENT(INOUT) :: spe,psin REAL(db) :: x0act,esf,enrold,xnorm,xnormb,exph2,varh2 COMPLEX(db) :: ps1(nx,ny,nz,2),ps2(nx,ny,nz,2) - INTEGER :: nst2 - ! Step 1:(h-esf) on psin yields ps1. * - esf=spe + !*********************************************************************** + ! Step 1:(h-esf) on psin yields ps1. + !*********************************************************************** + esf=spe CALL hpsi(iq,esf,psin,ps1) - ! Step 2: calculate matrix elements + !*********************************************************************** + ! Step 2: store ps1 in hampsi + !*********************************************************************** xnorm=rpsnorm(psin) - xnormb=overlap(psin,ps1) - DO nst2=1,nstmax - IF(tdiag.AND.isospin(nst2)==isospin(nst)) & - hmatr(nst2,nst)=overlap(psi(:,:,:,:,nst2),ps1) - ENDDO - IF(tdiag) hmatr(nst,nst)=hmatr(nst,nst)+spe + xnormb=REAL(overlap(psin,ps1)) + hampsi(:,:,:,:,localindex(nst))=ps1!store h|psi> in hampsi + !*********************************************************************** ! Step 3: calculate fluctuation, i.e. <h*h> and |h|**2 - IF((mprint>0.AND.MOD(iter,mprint)==0).OR.tvaryx_0) THEN + !*********************************************************************** + IF(mprint>0.AND.MOD(iter,mprint)==0) THEN CALL hpsi(iq,esf,ps1,ps2) - exph2=overlap(psin,ps2) + exph2=REAL(overlap(psin,ps2)) varh2=rpsnorm(ps1) sp_efluct1(nst)=SQRT(ABS(exph2/xnorm-(xnormb/xnorm)**2)) sp_efluct2(nst)=SQRT(ABS(varh2/xnorm-(xnormb/xnorm)**2)) ENDIF + !*********************************************************************** ! Step 4: the damping step - IF(e0dmp>0.0D0) THEN + !*********************************************************************** + IF(e0dmp>0.0D0) THEN ps1=ps1 - xnormb*psin x0act=x0dmp IF(TFFT) THEN @@ -262,89 +350,130 @@ CONTAINS ELSE psin=(1.0+x0dmp*xnormb)*psin-x0dmp*ps1 ENDIF + !*********************************************************************** ! Step 5: energy convergence criterion - enrold=spe + !*********************************************************************** + enrold=spe spe=xnormb+esf denerg=(enrold-spe)/ABS(spe) END SUBROUTINE grstep !************************************************************************* - SUBROUTINE diagstep(iq,nlin) + SUBROUTINE diagstep(iq,diagonalize) !*********************************************************************** ! * ! diagstep= diagonalize Hamiltonian matrix of active shells * + ! and do orthonomalization * ! * !*********************************************************************** - INTEGER,INTENT(IN) :: iq,nlin - INTEGER :: nst,nst2,noffset - INTEGER :: infoconv - REAL(db) :: eigen(nstmax) - COMPLEX(db) :: unitary(nstmax,nstmax) - COMPLEX(db), ALLOCATABLE :: psiw(:,:,:,:) ! work space - COMPLEX(db), ALLOCATABLE :: ps1(:,:,:,:,:) - COMPLEX(db) :: hmatr_lin(nlin+1,nlin) - COMPLEX(db) :: cwork(2*nlin*nlin) - REAL(db) :: rwork(2*nlin*nlin+5*nlin+1) - INTEGER :: iwork(5*nlin+3) - INTERFACE - SUBROUTINE zhbevd( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work, & - lwork, rwork, lrwork, iwork, liwork, info ) - USE Params, ONLY: db - CHARACTER(1) :: jobz, uplo - INTEGER :: info, kd, ldab, ldz, liwork, lrwork, lwork, n, iwork(*) - DOUBLE PRECISION :: rwork( * ), w( * ) - COMPLEX(8) :: ab( ldab, * ), work( * ), z( ldz, * ) - INTENT(IN) :: jobz,uplo,n,kd,ldab,ldz,lwork,lrwork,liwork - INTENT(INOUT) :: ab - INTENT(OUT) :: w,z,work,rwork,iwork,info - END SUBROUTINE zhbevd - END INTERFACE - ! Step 1: copy matrix into symmetric storage mode, then diagonalize + USE Trivial, ONLY: overlap,rpsnorm + USE Linalg, ONLY: eigenvecs,loewdin,comb_orthodiag,recombine,desc_t,desca + + INTEGER,INTENT(IN) :: iq + LOGICAL,INTENT(IN) :: diagonalize + INTEGER :: nst,nst2,noffset,i,ix,iy,iz,is,infoconv + COMPLEX(db), POINTER :: psi_x(:,:),psi_y(:,:),hampsi_x(:,:) + COMPLEX(db),ALLOCATABLE :: unitary(:,:),hmatr_lin(:,:),unitary_h(:,:), rhomatr_lin(:,:),& + rhomatr_lin_eigen(:,:), unitary_rho(:,:) + COMPLEX(db),ALLOCATABLE :: buf(:,:,:),buf2(:,:,:) + INTEGER :: big_dim,number_threads,tid,blocksize,blksz,it,jt,kt,tt + COMPLEX(db) :: sum,sum2 + EXTERNAL :: zgemv,zheevd,zgemm,zheev + INTEGER,EXTERNAL :: omp_get_num_threads,omp_get_thread_num + !*********************************************************************** + ! Step 1: Copy |psi> and h|psi> to 2d storage mode + !*********************************************************************** noffset=npmin(iq)-1 - DO nst=npmin(iq),npsi(iq) - DO nst2=nst,npsi(iq) - hmatr_lin(1+nst2-nst,nst-noffset)=& - 0.5D0*(CONJG(hmatr(nst,nst2))+hmatr(nst2,nst)) - ENDDO - ENDDO - CALL ZHBEVD('V','L',nlin,nlin,hmatr_lin,nlin+1,eigen,unitary,nstmax, & - cwork,nlin*nlin*2,rwork,2*nlin*nlin+5*nlin+1, & - iwork,5*nlin+3,infoconv) - ! Step 2: transform states, replace original ones - IF(tlarge) THEN - OPEN(scratch,status='scratch',form='unformatted') - ALLOCATE(psiw(nx,ny,nz,2)) - noffset=npmin(iq)-1 - DO nst=npmin(iq),npsi(iq) - psiw=CMPLX(0.0D0,0.0D0,db) - DO nst2=npmin(iq),npsi(iq) - psiw(:,:,:,:)=psiw(:,:,:,:) & - + unitary(nst2-noffset,nst-noffset)*psi(:,:,:,:,nst2) - ENDDO - WRITE(scratch) psiw - ENDDO - REWIND(scratch) - DO nst=npmin(iq),npsi(iq) - READ(scratch) psi(:,:,:,:,nst) - ENDDO - DEALLOCATE(psiw) - CLOSE(scratch) + ALLOCATE(unitary(nstloc_x(iq),nstloc_y(iq)), hmatr_lin(nstloc_x(iq),nstloc_y(iq)),& + unitary_h(nstloc_x(iq),nstloc_y(iq)), rhomatr_lin(nstloc_x(iq),nstloc_y(iq)),& + rhomatr_lin_eigen(nstloc_x(iq),nstloc_y(iq)), unitary_rho(nstloc_x(iq),nstloc_y(iq))) + big_dim = nx*ny*nz*2 + ALLOCATE(buf(nstloc_x(iq),nstloc_y(iq),24),buf2(nstloc_x(iq),nstloc_y(iq),24)) + + + unitary_h=0.0d0 + hmatr_lin=0.0d0 + rhomatr_lin=0.0d0 + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) + + CALL PZGEMM('C','N',npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,big_dim,cmplxone,psi,1,1,& + desc_t(iq,1:10),psi,1,1,desc_t(iq,1:10),cmplxone,rhomatr_lin,1,1,desca(iq,1:10)) + + CALL PZGEMM('C','N',npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,big_dim,cmplxone,psi,1,1,& + desc_t(iq,1:10),hampsi,1,1,desc_t(iq,1:10),cmplxone,hmatr_lin,1,1,desca(iq,1:10)) + + !*********************************************************************** + ! Step 2: Calculate lower tringular of h-matrix and overlaps. + !*********************************************************************** + unitary_h=0.0d0 + + DO nst=1,nstloc_x(iq) + DO nst2=1,nstloc_y(iq) + ix=globalindex_x(nst,iq) + iy=globalindex_y(nst2,iq) + IF(ix==iy) THEN + sp_norm(ix)=REAL(rhomatr_lin(nst,nst2)) + IF(diagonalize) THEN + hmatr_lin(nst,nst2)=sp_energy(ix)!account for hampsi=(h-spe)|psi> + ELSE + unitary_h(nst,nst2)=CMPLX(1.0d0,0.0d0) + END IF + END IF + ENDDO !for nst2 + ENDDO !for nst + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'CalcMatrix+Comm1d->2d: ') + !*********************************************************************** + ! Step 3: Calculate eigenvectors of h if wanted + !*********************************************************************** + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) + IF(diagonalize) THEN + CALL eigenvecs(hmatr_lin,unitary_h,iq=iq) + END IF + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'Diag matrix: ') + !*********************************************************************** + ! Step 4: Calculate matrix for Loewdin + !*********************************************************************** + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) + CALL loewdin(rhomatr_lin,unitary_rho,iq) + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'Ortho matrix: ') + !*********************************************************************** + ! Step 5: Combine h and diagonalization matrix and transpose them + !*********************************************************************** + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) + CALL comb_orthodiag(unitary_h,unitary_rho,unitary,iq) + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'Combine: ') + !*********************************************************************** + ! Step 6: Recombine |psi> and write them into 1d storage mode + !*********************************************************************** + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) +! !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) +! DO i=1,nx*ny*nz*2 +! CALL recombine(unitary,psi_y(i,:),psi_x(i,:),iq) +! END DO +! !$OMP END PARALLEL DO +! DO i=1,nstloc_y(iq) +! WRITE(*,*)iq,i,SUM(REAL(psi_y(:,i))**2+AIMAG(psi_y(:,i))**2),SUM(REAL(psi_y(:,i))+AIMAG(psi_y(:,i))) +! END DO + ! + CALL PZGEMM('N','T',big_dim,npsi(iq)-npmin(iq)+1,npsi(iq)-npmin(iq)+1,cmplxone,psi,1,1,desc_t(iq,1:10),& + unitary,1,1,desca(iq,1:10),cmplxone,hampsi,1,1,desc_t(iq,1:10)) + psi = hampsi + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'recombine: ') + DEALLOCATE(unitary,hmatr_lin,unitary_h,rhomatr_lin,& + rhomatr_lin_eigen,unitary_rho) + IF(tmpi.AND.ttime) CALL mpi_start_timer(2) + IF(tmpi) THEN +! CALL collect_wf_1d_x(psi,psi_x,iq) +! DEALLOCATE(psi_x,psi_y,hampsi_x) ELSE - ALLOCATE(ps1(nx,ny,nz,2,nlin)) - noffset=npmin(iq)-1 - ps1=0.0D0 - DO nst=npmin(iq),npsi(iq) - DO nst2=npmin(iq),npsi(iq) - ps1(:,:,:,:,nst-noffset)=ps1(:,:,:,:,nst-noffset) & - + unitary(nst2-noffset,nst-noffset)*psi(:,:,:,:,nst2) - ENDDO - ENDDO - psi(:,:,:,:,npmin(iq):npsi(iq))=ps1 - DEALLOCATE(ps1) - ENDIF +! NULLIFY(psi_x,psi_y,hampsi_x) + END IF + IF(tmpi.AND.ttime) CALL mpi_stop_timer(2,'Comm 2d->1d: ') END SUBROUTINE diagstep !************************************************************************* - SUBROUTINE sinfo + SUBROUTINE sinfo(printing) INTEGER :: il + LOGICAL :: printing + REAL(db):: tabc_energy, tabc_ekin, tabc_ecoul, tabc_eskyrme CHARACTER(*),PARAMETER :: & header=' # Par v**2 var_h1 var_h2 Norm Ekin Energy & & Lx Ly Lz Sx Sy Sz ' @@ -353,56 +482,97 @@ CONTAINS CALL integ_energy CALL sum_energy ! add information to summary files - OPEN(unit=scratch,file=converfile,POSITION='APPEND') - WRITE(scratch,'(1x,i5,f9.2,3(1pg11.3),2(0pf8.3),f6.1)') & - iter,ehf,delesum/pnrtot,efluct1,efluct2,rmstot,beta,gamma - CLOSE(scratch) - OPEN(unit=scratch,file=dipolesfile, POSITION='APPEND') - WRITE(scratch,'(1x,i5,6E14.4)') iter,cmtot,cm(:,2)-cm(:,1) - CLOSE(unit=scratch) - OPEN(unit=scratch,file=spinfile, POSITION='APPEND') - WRITE(scratch,'(1x,i5,9F10.4)') iter,orbital,spin,total_angmom - CLOSE(unit=scratch) - WRITE(*,'(/,A,I7,A/2(A,F12.4),A/(3(A,E12.5),A),3(A,E12.5))') & - ' ***** Iteration ',iter,' *****',' Total energy: ',ehf,' MeV Total kinetic energy: ', & - tke,' MeV',' de/e: ',delesum,' h**2 fluct.: ',efluct1, & - ' MeV, h*hfluc.: ',efluct2,' MeV', & - ' MeV. Rearrangement E: ',e3corr,' MeV. Coul.Rearr.: ', & - ecorc,' MeV x0dmp: ',x0dmp - ! detail printout - WRITE(*,'(/A)') ' Energies integrated from density functional:' - WRITE(*,'(4(A,1PE14.6),A/26X,3(A,1PE14.6),A)') & - ' Total:',ehfint,' MeV. t0 part:',ehf0,' MeV. t1 part:',ehf1, & - ' MeV. t2 part:',ehf2,' MeV',' t3 part:',ehf3,' MeV. t4 part:',ehfls, & - ' MeV. Coulomb:',ehfc,' MeV.' - IF(ipair/=0) WRITE(*,'(2(A,1PE14.6))') ' Pairing energy neutrons: ', & - epair(1),' protons: ',epair(2) - ! output densities - IF(mplot/=0) THEN - IF(MOD(iter,mplot)==0) THEN - CALL plot_density - CALL write_densities - ENDIF - ENDIF - IF(.NOT.wflag) RETURN - ! print details of s.p. levels - WRITE(*,'(A)') ' Neutron Single Particle States:',header - DO il=1,nstmax - IF(il==npmin(2)) THEN - WRITE(*,'(A)') ' Proton Single Particle States:',header + IF(printing) THEN + IF(tabc_nprocs>1) THEN + tabc_energy=tabc_av(ehf) + tabc_ekin=tabc_av(tke) + tabc_ecoul=tabc_av(ehfc) + tabc_eskyrme=tabc_energy-tabc_ekin-tabc_ecoul + IF(tabc_myid==0) THEN + OPEN(unit=scratch,file=tabcfile,POSITION='APPEND') + WRITE(scratch,'(1x,i5,4F15.7)') & + iter, tabc_energy, tabc_ekin, tabc_ecoul, tabc_eskyrme + CLOSE(unit=scratch) + END IF END IF - WRITE(*,'(1X,I3,F4.0,F8.5,2F9.5,F9.6,F8.3,F10.3,3F8.3,3F7.3)') & - il,sp_parity(il),wocc(il),sp_efluct1(il),sp_efluct2(il), & - sp_norm(il),sp_kinetic(il),sp_energy(il), & - sp_orbital(:,il),sp_spin(:,il) - ENDDO - CALL moment_print + OPEN(unit=scratch,file=energiesfile,POSITION='APPEND') + WRITE(scratch,'(1x,i5,2F8.3,12F15.7)') & + iter,pnr,ehf,ehfint,tke,ehfc,ehfCrho0,ehfCrho1,ehfCdrho0,ehfCdrho1,ehfCtau0,& + ehfCtau1,ehfCdJ0,ehfCdJ1 + CLOSE(unit=scratch) + OPEN(unit=scratch,file=converfile,POSITION='APPEND') + WRITE(scratch,'(1x,i5,f9.2,3(1pg11.3),2(0pf8.3),f6.1,f10.7)') & + iter,ehf,delesum/pnrtot,efluct1,efluct2,rmstot,beta,gamma,x0dmp + CLOSE(scratch) + OPEN(unit=scratch,file=dipolesfile, POSITION='APPEND') + WRITE(scratch,'(1x,i5,6E14.4)') iter,cmtot,cm(:,2)-cm(:,1) + CLOSE(unit=scratch) + OPEN(unit=scratch,file=spinfile, POSITION='APPEND') + WRITE(scratch,'(1x,i5,9F10.4)') iter,orbital,spin,total_angmom + CLOSE(unit=scratch) + WRITE(*,'(/,A,I7,A/2(A,F12.4),A/(3(A,E12.5),A))') & + ' ***** Iteration ',iter,' *************************************************& + &***********************************',' Total energy: ',ehf,& + ' MeV Total kinetic energy: ', tke,' MeV',' de/e: ',delesum,& + ' h**2 fluct.: ',efluct1,' MeV, h*hfluc.: ',efluct2,' MeV', & + ' MeV. Rearrangement E: ',e3corr,' MeV. Coul.Rearr.: ', & + ecorc,' MeV' + ! detail printout + WRITE(*,'(/A)') ' Energies integrated from density functional:********************& + &********************************************' + WRITE(*,'(4(A,1PE14.6),A/26X,3(A,1PE14.6),A)') & + ' Total:',ehfint,' MeV. t0 part:',ehf0,' MeV. t1 part:',ehf1, & + ' MeV. t2 part:',ehf2,' MeV.',' t3 part:',ehf3,' MeV. t4 part:',ehfls, & + ' MeV. Coulomb:',ehfc,' MeV.' + WRITE(*,*)' *********************************************& + &**************************************' + WRITE(*,'(26X,3(A,1PE14.6),A)')' Crho0: ',ehfCrho0,' MeV. Crho1: ',ehfCrho1,' MeV.' + WRITE(*,'(26X,3(A,1PE14.6),A)')' Cdrho0: ',ehfCdrho0,' MeV. Cdrho1: ',ehfCdrho1,' MeV.' + WRITE(*,'(26X,3(A,1PE14.6),A)')' Ctau0: ',ehfCtau0,' MeV. Ctau1: ',ehfCtau1,' MeV.' + WRITE(*,'(26X,3(A,1PE14.6),A)')' CdJ0: ',ehfCdJ0,' MeV. CdJ1: ',ehfCdJ1,' MeV.' + WRITE(*,*)'**********************************************************************& + &**************************************' + IF(ipair/=0) THEN + WRITE(*,'(a)') ' e_ferm e_pair <uv delta> <v2 delta> aver_force ' + DO il=1,2 + WRITE(*,'(a,i2,a,5(1pg12.4))') 'iq=',il,': ',eferm(il) , & + epair(il) ,avdelt(il),avdeltv2(il),avg(il) + ENDDO + WRITE(*,*)'**********************************************************************& + &**************************************' + END IF + ! output densities + IF(mplot/=0) THEN + IF(MOD(iter,mplot)==0) THEN + !CALL plot_density + CALL write_densities + ENDIF + ENDIF + IF(.NOT.wflag) RETURN + ! print details of s.p. levels + WRITE(*,'(A)') ' Neutron Single Particle States:',header + DO il=1,nstmax + IF(il==npmin(2)) THEN + WRITE(*,'(A)') ' Proton Single Particle States:',header + END IF + WRITE(*,'(1X,I3,F4.0,F8.5,2F9.5,F9.6,F8.3,F10.3,3F8.3,3F7.3)') & + il,sp_parity(il),wocc(il),sp_efluct1(il),sp_efluct2(il), & + sp_norm(il),sp_kinetic(il),sp_energy(il), & + sp_orbital(:,il),sp_spin(:,il) + ENDDO + CALL moment_print + END IF END SUBROUTINE sinfo !************************************************************************* SUBROUTINE harmosc - USE Trivial, ONLY: rpsnorm + USE Trivial, ONLY: rpsnorm,overlap REAL(db) :: xx,yy,zz,xx2,zz2,y2,anorm,temp INTEGER :: nst,iq,is,ix,iy,iz,nps,i,j,k,ka,nshell(3,nstmax) + COMPLEX(db) :: psitemp(nx,ny,nz,2) + IF(wflag)WRITE(*,*) "Harmonic oscillators widths (x-y-z):" + IF(wflag)WRITE(*,"(3F12.4)") radinx,radiny,radinz + psitemp=0.0d0 + psi=0.0d0 wocc=0.D0 wocc(1:nneut)=1.D0 wocc(npmin(2):npmin(2)+nprot-1)=1.D0 @@ -411,76 +581,224 @@ CONTAINS !************************************************************************* nst=0 DO iq=1,2 - IF(iq==1) THEN - nps=npsi(1) - ELSE - nps=npsi(2) - ENDIF - ka_loop: DO ka=0,nps - DO k=0,nps - DO j=0,nps - DO i=0,nps - IF(ka==i+j+k) THEN - DO is=1,2 - nst=nst+1 - IF(nst>nps) EXIT ka_loop - nshell(1,nst)=i - nshell(2,nst)=j - nshell(3,nst)=k - ENDDO - ENDIF + IF(iq==1) THEN + nps=npsi(1) + ELSE + nps=npsi(2) + ENDIF + ka_loop: DO ka=0,nps + DO k=0,ka + DO j=0,ka + DO i=0,ka + IF(ka==i+j+k) THEN + DO is=1,2 + nst=nst+1 + IF(nst>nps) EXIT ka_loop + nshell(1,nst)=i + nshell(2,nst)=j + nshell(3,nst)=k ENDDO - ENDDO + ENDIF + ENDDO ENDDO - ENDDO ka_loop - nst=nst-1 + ENDDO + ENDDO ka_loop + nst=nst-1 END DO DO iq=1,2 - nst=npmin(iq) - DO iz=1,nz - zz2=(z(iz)/radinz)**2 - DO ix=1,nx - xx2=(x(ix)/radinx)**2 - DO iy=1,ny - y2=(y(iy)/radiny)**2 - temp=xx2+y2+zz2 - psi(ix,iy,iz,1,nst)=EXP(-(temp)) - ENDDO + nst=npmin(iq) + DO iz=1,nz + zz2=(z(iz)/radinz)**2 + DO ix=1,nx + xx2=(x(ix)/radinx)**2 + DO iy=1,ny + y2=(y(iy)/radiny)**2 + temp=xx2+y2+zz2 + IF(node(nst)==mpi_myproc) psi(ix,iy,iz,1,localindex(nst))=EXP(-(temp)) + psitemp(ix,iy,iz,1)=EXP(-(temp)) ENDDO - ENDDO - anorm=rpsnorm(psi(:,:,:,:,nst)) - psi(:,:,:,:,nst)=psi(:,:,:,:,nst)/SQRT(anorm) - !************************************************************************* - ! Higher states: lowest * polynomial - !************************************************************************* - DO nst=npmin(iq)+1,npsi(iq) - is=MOD(nst-npmin(iq),2)+1 - DO iz=1,nz - IF(nshell(3,nst)/=0) THEN - zz=z(iz)**nshell(3,nst) - ELSE - zz=1.0D0 - ENDIF - DO iy=1,ny - IF(nshell(2,nst)/=0) THEN - yy=y(iy)**nshell(2,nst) - ELSE - yy=1.0D0 - ENDIF - DO ix=1,nx - IF(nshell(1,nst)/=0) THEN - xx=x(ix)**nshell(1,nst) - ELSE - xx=1.0D0 - ENDIF - psi(ix,iy,iz,is,nst)=psi(ix,iy,iz,1,npmin(iq))*xx*yy*zz - ENDDO - ENDDO + ENDDO + ENDDO + IF(node(nst)==mpi_myproc) THEN + anorm=rpsnorm(psi(:,:,:,:,localindex(nst))) + psi(:,:,:,:,localindex(nst))=psi(:,:,:,:,localindex(nst))/SQRT(anorm) + END IF + anorm=rpsnorm(psitemp(:,:,:,:)) + psitemp(:,:,:,:)=psitemp(:,:,:,:)/SQRT(anorm) + !************************************************************************* + ! Higher states: lowest * polynomial + !************************************************************************* + DO nst=npmin(iq)+1,npsi(iq) + IF (node(nst)==mpi_myproc) THEN + is=MOD(nst-npmin(iq),2)+1 + DO iz=1,nz + IF(nshell(3,nst)/=0) THEN + zz=z(iz)**nshell(3,nst) + ELSE + zz=1.0D0 + ENDIF + DO iy=1,ny + IF(nshell(2,nst)/=0) THEN + yy=y(iy)**nshell(2,nst) + ELSE + yy=1.0D0 + ENDIF + DO ix=1,nx + IF(nshell(1,nst)/=0) THEN + xx=x(ix)**nshell(1,nst) + ELSE + xx=1.0D0 + ENDIF + psi(ix,iy,iz,is,localindex(nst))=psitemp(ix,iy,iz,1)*xx*yy*zz + ENDDO ENDDO - END DO + ENDDO + psi(:,:,:,:,localindex(nst))=psi(:,:,:,:,localindex(nst))/sqrt(rpsnorm(psi(:,:,:,:,localindex(nst)))) + END IF + END DO END DO - WRITE(*,*) "Harmonic oscillators widths (x-y-z):" - WRITE(*,"(3F12.4)") radinx,radiny,radinz - WRITE(*,*) '***** Harmonic oscillator initialization complete *****' + IF(wflag)WRITE(*,*) '***** Harmonic oscillator initialization complete *****' END SUBROUTINE harmosc +!************************************************************************************ + SUBROUTINE planewaves + IMPLICIT NONE + INTEGER :: nst, iq + ! + INTEGER :: i,j,l,ii,jj,kk + INTEGER :: kf(3,npsi(2)),temp_k(3) + LOGICAL :: check + INTEGER :: ki(3,8*7**3),ki_t(3,7**3) + REAL(db) :: temp_e,temp_energies(8*7**3) + WRITE(*,*) + WRITE(*,*)'*****init plane waves:*****' + psi=(0.d0,0.d0) + wocc=0.D0 + wocc(1:nneut)=1.D0 + wocc(npmin(2):npmin(2)+nprot-1)=1.D0 + !*********************************************************************** + ! calculate all k * + !*********************************************************************** + j=0 + ii=0 + jj=0 + kk=0 + DO i=1,7**3 + IF (ii==7) THEN + ii=0 + jj=jj+1 + END IF + IF (jj==7) THEN + jj=0 + kk=kk+1 + END IF + ki(1,i)=ii + ki(2,i)=jj + ki(3,i)=kk + ii=ii+1 + END DO + ki_t(:,1:7**3)=ki(:,1:7**3) + l=1 + DO i=1,7**3 + DO j=1,8 + ki(:,l)=ki_t(:,i) + IF(j==2.OR.j==4.OR.j==6.OR.j==8) THEN + ki(1,l)=-ki_t(1,i) + END IF + IF(j==3.OR.j==4.OR.j==7.OR.j==8) THEN + ki(2,l)=-ki_t(2,i) + END IF + IF(j==5.OR.j==6.OR.j==7.OR.j==8) THEN + ki(3,l)=-ki_t(3,i) + END IF + temp_energies(l)=epw(ki(1,l),ki(2,l),ki(3,l)) + l=l+1 + END DO + END DO + !insertion_sort + DO i=2,8*7**3 + temp_e=temp_energies(i) + temp_k(:)=ki(:,i) + j=i + DO WHILE (j>1 .AND. temp_energies(j-1)>temp_e) + temp_energies(j)=temp_energies(j-1) + ki(:,j)=ki(:,j-1) + j=j-1 + END DO + temp_energies(j)=temp_e + ki(:,j)=temp_k(:) + END DO + nst = 1 + DO iq = 1,2 + i=1 !counts nobs/2 (spin) + j=1 !counts "-"-signs + l=1 !counts ki + DO WHILE(nst<=npsi(iq)) + kf(:,i)=ki(:,l) + CALL check_kf(kf,i,check) + IF(check) THEN + CALL pw(nst,kf(1,i),kf(2,i),kf(3,i),1) + nst=nst+1 + CALL pw(nst,kf(1,i),kf(2,i),kf(3,i),-1) + nst=nst+1 + i=i+1 + END IF + l=l+1 + END DO + END DO + END SUBROUTINE planewaves + ! +REAL(db) FUNCTION epw(kx,ky,kz) RESULT(e) + USE FORCES, ONLY: nucleon_mass + INTEGER,INTENT(IN) :: kx,ky,kz + REAL(db) :: dx,dy,dz + dx=x(2)-x(1) + dy=y(2)-y(1) + dz=z(2)-z(1) + e=(hbc**2)/(2*nucleon_mass)*(((2*pi*kx+bangx)/(nx*dx))**2& + +((2*pi*ky+bangy)/(ny*dy))**2+((2*pi*kz+bangz)/(nz*dz))**2) +END FUNCTION + +SUBROUTINE pw(n,kx,ky,kz,s) + USE Trivial, ONLY : rpsnorm + IMPLICIT NONE + INTEGER,INTENT(IN) :: n,kx,ky,kz,s + INTEGER :: ix,iy,iz,nst + REAL(db) :: facx,facy,facz,norm + COMPLEX(db) :: fy,fz + IF(mpi_myproc/=node(n)) RETURN + nst=localindex(n) + DO iz = 1,nz + facz=REAL(iz-1)*((2.D0*pi*REAL(kz)+bangz)/FLOAT(nz)) + fz=CMPLX(COS(facz),SIN(facz),db) + DO iy=1,ny + facy=REAL(iy-1)*((2.D0*pi*REAL(ky)+bangy)/FLOAT(ny)) + fy=CMPLX(COS(facy),SIN(facy),db) + DO ix=1,nx + facx=REAL(ix-1)*((2.D0*pi*REAL(kx)+bangx)/FLOAT(nx)) + IF(s>0) THEN + psi(ix,iy,iz,1,nst)=fz*fy*CMPLX(COS(facx),SIN(facx),db) + psi(ix,iy,iz,2,nst)=0.D0 + ELSE + psi(ix,iy,iz,2,nst)=fz*fy*CMPLX(COS(facx),SIN(facx),db) + psi(ix,iy,iz,1,nst)=0.D0 + END IF + ENDDO + ENDDO + ENDDO + norm=SQRT(rpsnorm(psi(:,:,:,:,nst))) + psi(:,:,:,:,nst)=psi(:,:,:,:,nst)/norm + WRITE(*,'(A14,3I2,A7,I2,A12,I4,A8,F9.5,A10,F6.3)')'state with k=(',kx,ky,kz,& + '), spin= ',s,' at position',globalindex(nst),' energy ',epw(kx,ky,kz),' and wocc=',wocc(nst) +END SUBROUTINE pw +! +SUBROUTINE check_kf(k,i,check) + INTEGER,INTENT(IN) :: k(3,npsi(2)),i + LOGICAL,INTENT(OUT) :: check + INTEGER :: j + check=.TRUE. + IF(i==1) RETURN + DO j=1,i-1 + IF(k(1,j)==k(1,i).AND.k(2,j)==k(2,i).AND.k(3,j)==k(3,i)) check=.FALSE. + END DO +END SUBROUTINE END MODULE Static