From 98599b2b7dcd81dbe3b6093e465c1963a595fa1b Mon Sep 17 00:00:00 2001 From: Michele Guerini Rocco Date: Mon, 20 Dec 2021 18:46:44 +0100 Subject: [PATCH] src/dispersion.f90: compute factorials incrementally --- src/dispersion.f90 | 305 ++++++++++++++++++++++++++------------------- 1 file changed, 176 insertions(+), 129 deletions(-) diff --git a/src/dispersion.f90 b/src/dispersion.f90 index 1cc037c..074825f 100644 --- a/src/dispersion.f90 +++ b/src/dispersion.f90 @@ -262,35 +262,36 @@ subroutine warmdisp(xg,yg,mu,npl,nprf,sox,lrm,err,nprr,npri,fast,imx,ex,ey,ez) end if ! end subroutine warmdisp -! -! -! + + subroutine diel_tens_fr(yg,mu,npl,a330,epsl,lrm,fast) -! Fully relativistic case computation of dielectric tensor elements -! up to third order in Larmor radius for hermitian part -! - use math, only : fact + ! Fully relativistic case computation of dielectric tensor elements + ! up to third order in Larmor radius for hermitian part + ! + implicit none -! arguments + + ! subroutine arguments integer :: lrm,fast real(wp_) :: yg,mu,npl complex(wp_) :: a330 complex(wp_), dimension(3,3,lrm) :: epsl -! local variables + + ! local variables integer :: i,j,l,is,k,lm real(wp_) :: cr,ci real(wp_) :: npl2,dnl,w,asl,bsl,cmxw,fal real(wp_), dimension(-lrm:lrm,0:2,0:lrm) :: rr real(wp_), dimension(lrm,0:2,lrm) :: ri complex(wp_) :: ca11,ca12,ca22,ca13,ca23,ca33,cq0p,cq0m,cq1p,cq1m,cq2p -! + npl2=npl**2 dnl=one-npl2 -! + cmxw=one+15.0_wp_/(8.0_wp_*mu)+105.0_wp_/(128.0_wp_*mu**2) cr=-mu*mu/(sqrt_pi*cmxw) ci=sqrt(2.0_wp_*pi*mu)*mu**2/cmxw -! + do l=1,lrm do j=1,3 do i=1,3 @@ -298,73 +299,92 @@ subroutine diel_tens_fr(yg,mu,npl,a330,epsl,lrm,fast) end do end do end do -! + if (fast<4) then call hermitian(rr,yg,mu,npl,cr,fast,lrm) else call hermitian_2(rr,yg,mu,npl,cr,fast,lrm) end if -! + call antihermitian(ri,yg,mu,npl,ci,lrm) -! - do l=1,lrm - lm=l-1 - fal=-0.25_wp_**l*fact(2*l)/(fact(l)**2*yg**(2*lm)) - ca11=czero - ca12=czero - ca13=czero - ca22=czero - ca23=czero - ca33=czero - do is=0,l - k=l-is - w=(-one)**k -! - asl=w/(fact(is+l)*fact(l-is)) - bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one)) -! - if(is.gt.0) then - cq0p=rr(is,0,l)+rr(-is,0,l)+im*ri(is,0,l) - cq0m=rr(is,0,l)-rr(-is,0,l)+im*ri(is,0,l) - cq1p=rr(is,1,l)+rr(-is,1,l)+im*ri(is,1,l) - cq1m=rr(is,1,l)-rr(-is,1,l)+im*ri(is,1,l) - cq2p=rr(is,2,l)+rr(-is,2,l)+im*ri(is,2,l) - else - cq0p=rr(is,0,l) - cq0m=rr(is,0,l) - cq1p=rr(is,1,l) - cq1m=rr(is,1,l) - cq2p=rr(is,2,l) - end if -! - ca11=ca11+is**2*asl*cq0p - ca12=ca12+is*l*asl*cq0m - ca22=ca22+bsl*cq0p - ca13=ca13+is*asl*cq1m/yg - ca23=ca23+l*asl*cq1p/yg - ca33=ca33+asl*cq2p/yg**2 + block + ! compute all factorials incrementally + integer :: fact_l, fact_2l, fact_l2 + integer :: fact_lpis, fact_lmis + + fact_l = 1 ! l! + fact_2l = 1 ! (2l)! + fact_l2 = 1 ! (l!)² + + do l=1,lrm + fact_l = fact_l * l + fact_2l = fact_2l * 2*l * (2*l - 1) + fact_l2 = fact_l2 * l**2 + + lm=l-1 + fal=-0.25_wp_**l * fact_2l/(fact_l2 * yg**(2*lm)) + ca11=czero + ca12=czero + ca13=czero + ca22=czero + ca23=czero + ca33=czero + + fact_lpis = fact_l ! (l+is)! + fact_lmis = fact_l ! (l-is)! + + do is=0,l + k=l-is + w=(-one)**k + + asl=w/(fact_lpis * fact_lmis * one) + bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one)) + + fact_lpis = fact_lpis * (l + is+1) + fact_lmis = merge(fact_lmis / (l - is), 1, is < l) + + if(is.gt.0) then + cq0p=rr(is,0,l)+rr(-is,0,l)+im*ri(is,0,l) + cq0m=rr(is,0,l)-rr(-is,0,l)+im*ri(is,0,l) + cq1p=rr(is,1,l)+rr(-is,1,l)+im*ri(is,1,l) + cq1m=rr(is,1,l)-rr(-is,1,l)+im*ri(is,1,l) + cq2p=rr(is,2,l)+rr(-is,2,l)+im*ri(is,2,l) + else + cq0p=rr(is,0,l) + cq0m=rr(is,0,l) + cq1p=rr(is,1,l) + cq1m=rr(is,1,l) + cq2p=rr(is,2,l) + end if + + ca11=ca11+is**2*asl*cq0p + ca12=ca12+is*l*asl*cq0m + ca22=ca22+bsl*cq0p + ca13=ca13+is*asl*cq1m/yg + ca23=ca23+l*asl*cq1p/yg + ca33=ca33+asl*cq2p/yg**2 + end do + epsl(1,1,l) = - ca11*fal + epsl(1,2,l) = + im*ca12*fal + epsl(2,2,l) = - ca22*fal + epsl(1,3,l) = - ca13*fal + epsl(2,3,l) = - im*ca23*fal + epsl(3,3,l) = - ca33*fal end do - epsl(1,1,l) = - ca11*fal - epsl(1,2,l) = + im*ca12*fal - epsl(2,2,l) = - ca22*fal - epsl(1,3,l) = - ca13*fal - epsl(2,3,l) = - im*ca23*fal - epsl(3,3,l) = - ca33*fal - end do -! + end block + cq2p=rr(0,2,0) a330=cq2p -! + do l=1,lrm epsl(2,1,l) = - epsl(1,2,l) epsl(3,1,l) = epsl(1,3,l) epsl(3,2,l) = - epsl(2,3,l) end do -! + end subroutine diel_tens_fr -! -! -! + + subroutine hermitian(rr,yg,mu,npl,cr,fast,lrm) use eierf, only : calcei3 implicit none @@ -780,7 +800,6 @@ end function fhermit ! ! subroutine antihermitian(ri,yg,mu,npl,ci,lrm) - use math, only : fact implicit none ! local constants integer, parameter :: lmx=20,nmx=lmx+2 @@ -856,17 +875,26 @@ subroutine antihermitian(ri,yg,mu,npl,ci,lrm) else ee=exp(-mu*(ygn-one+npl*ub)) call ssbi(aa,n,lrm,fsbi) - do m=n,lrm - cm=sqrt_pi*fact(m)*du**(2*m+1) - cim=0.5_wp_*ci*dnl**m - mm=m-n+1 - fi0m=cm*fsbi(mm) - fi1m=-0.5_wp_*aa*cm*fsbi(mm+1) - fi2m=0.5_wp_*cm*(fsbi(mm+1)+0.5_wp_*aa*aa*fsbi(mm+2)) - ri(n,0,m)=cim*ee*fi0m - ri(n,1,m)=cim*ee*(du*fi1m+ub*fi0m) - ri(n,2,m)=cim*ee*(du*du*fi2m+2.0_wp_*du*ub*fi1m+ub*ub*fi0m) - end do + + block + ! incrementally compute m! + integer :: fact_m + fact_m = 1 + + do m=n,lrm + fact_m = fact_m * m + + cm=sqrt_pi*fact_m*du**(2*m+1) + cim=0.5_wp_*ci*dnl**m + mm=m-n+1 + fi0m=cm*fsbi(mm) + fi1m=-0.5_wp_*aa*cm*fsbi(mm+1) + fi2m=0.5_wp_*cm*(fsbi(mm+1)+0.5_wp_*aa*aa*fsbi(mm+2)) + ri(n,0,m)=cim*ee*fi0m + ri(n,1,m)=cim*ee*(du*fi1m+ub*fi0m) + ri(n,2,m)=cim*ee*(du*du*fi2m+2.0_wp_*du*ub*fi1m+ub*ub*fi0m) + end do + end block end if end if end do @@ -917,79 +945,98 @@ subroutine expinit end do ! end subroutine expinit -! -! -! + + subroutine diel_tens_wr(yg,mu,npl,a330,epsl,lrm) -! Weakly relativistic dielectric tensor computation of dielectric -! tensor elements (Krivenki and Orefice, JPP 30,125 - 1983) -! - use math, only : fact + ! Weakly relativistic dielectric tensor computation of dielectric + ! tensor elements (Krivenki and Orefice, JPP 30,125 - 1983) + ! + implicit none -! arguments + + ! subroutine arguments integer :: lrm real(wp_) :: yg,npl,mu complex(wp_) :: a330,epsl(3,3,lrm) -! local variables + + ! local variables integer :: l,lm,is,k real(wp_) :: npl2,fcl,w,asl,bsl complex(wp_) :: ca11,ca12,ca13,ca22,ca23,ca33,cq0p,cq0m,cq1p,cq1m,cq2p complex(wp_), dimension(0:lrm,0:2) :: cefp,cefm -! + npl2=npl*npl -! + call fsup(cefp,cefm,lrm,yg,npl,mu) -! - do l=1,lrm - lm=l-1 - fcl=0.5_wp_**l*((one/yg)**2/mu)**lm*fact(2*l)/fact(l) - ca11=czero - ca12=czero - ca13=czero - ca22=czero - ca23=czero - ca33=czero - do is=0,l - k=l-is - w=(-one)**k -! - asl=w/(fact(is+l)*fact(l-is)) - bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one)) -! - cq0p=mu*cefp(is,0) - cq0m=mu*cefm(is,0) - cq1p=mu*npl*(cefp(is,0)-cefp(is,1)) - cq1m=mu*npl*(cefm(is,0)-cefm(is,1)) - cq2p=cefp(is,1)+mu*npl2*(cefp(is,2)+cefp(is,0)-2.0_wp_*cefp(is,1)) -! - ca11=ca11+is**2*asl*cq0p - ca12=ca12+is*l*asl*cq0m - ca22=ca22+bsl*cq0p - ca13=ca13+is*asl*cq1m/yg - ca23=ca23+l*asl*cq1p/yg - ca33=ca33+asl*cq2p/yg**2 + + block + ! compute all factorials incrementally + integer :: fact_l, fact_2l_l + integer :: fact_lpis, fact_lmis + + fact_l = 1 ! l! + fact_2l_l = 1 ! (2l)!/l! + + do l=1,lrm + fact_l = fact_l * l + fact_2l_l = fact_2l_l * (4*l - 2) ! see http://oeis.org/A001813 + + lm=l-1 + fcl=0.5_wp_**l*((one/yg)**2/mu)**lm*fact_2l_l + ca11=czero + ca12=czero + ca13=czero + ca22=czero + ca23=czero + ca33=czero + + fact_lpis = fact_l ! (l+is)! + fact_lmis = fact_l ! (l-is)! + + do is=0,l + k=l-is + w=(-one)**k + + asl=w/(fact_lpis * fact_lmis * one) + bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one)) + + fact_lpis = fact_lpis * (l + is+1) + fact_lmis = merge(fact_lmis / (l - is), 1, is < l) + + cq0p=mu*cefp(is,0) + cq0m=mu*cefm(is,0) + cq1p=mu*npl*(cefp(is,0)-cefp(is,1)) + cq1m=mu*npl*(cefm(is,0)-cefm(is,1)) + cq2p=cefp(is,1)+mu*npl2*(cefp(is,2)+cefp(is,0)-2.0_wp_*cefp(is,1)) + + ca11=ca11+is**2*asl*cq0p + ca12=ca12+is*l*asl*cq0m + ca22=ca22+bsl*cq0p + ca13=ca13+is*asl*cq1m/yg + ca23=ca23+l*asl*cq1p/yg + ca33=ca33+asl*cq2p/yg**2 + end do + epsl(1,1,l) = -ca11*fcl + epsl(1,2,l) = +im*ca12*fcl + epsl(2,2,l) = -ca22*fcl + epsl(1,3,l) = -ca13*fcl + epsl(2,3,l) = -im*ca23*fcl + epsl(3,3,l) = -ca33*fcl end do - epsl(1,1,l) = -ca11*fcl - epsl(1,2,l) = +im*ca12*fcl - epsl(2,2,l) = -ca22*fcl - epsl(1,3,l) = -ca13*fcl - epsl(2,3,l) = -im*ca23*fcl - epsl(3,3,l) = -ca33*fcl - end do -! + end block + cq2p=cefp(0,1)+mu*npl2*(cefp(0,2)+cefp(0,0)-2.0_wp_*cefp(0,1)) a330=-mu*cq2p -! + do l=1,lrm epsl(2,1,l) = - epsl(1,2,l) epsl(3,1,l) = epsl(1,3,l) epsl(3,2,l) = - epsl(2,3,l) end do -! + end subroutine diel_tens_wr -! -! -! + + subroutine fsup(cefp,cefm,lrm,yg,npl,mu) implicit none ! local constants