From 98599b2b7dcd81dbe3b6093e465c1963a595fa1b Mon Sep 17 00:00:00 2001
From: Michele Guerini Rocco <m.guerinirocco2@campus.unimib.it>
Date: Mon, 20 Dec 2021 18:46:44 +0100
Subject: [PATCH] src/dispersion.f90: compute factorials incrementally

---
 src/dispersion.f90 | 305 ++++++++++++++++++++++++++-------------------
 1 file changed, 176 insertions(+), 129 deletions(-)

diff --git a/src/dispersion.f90 b/src/dispersion.f90
index 1cc037c..074825f 100644
--- a/src/dispersion.f90
+++ b/src/dispersion.f90
@@ -262,35 +262,36 @@ subroutine warmdisp(xg,yg,mu,npl,nprf,sox,lrm,err,nprr,npri,fast,imx,ex,ey,ez)
   end if
 !
 end subroutine warmdisp
-!
-!
-!
+
+
 subroutine diel_tens_fr(yg,mu,npl,a330,epsl,lrm,fast)
-! Fully relativistic case computation of dielectric tensor elements
-! up to third order in Larmor radius for hermitian part
-!
-  use math, only : fact
+  ! Fully relativistic case computation of dielectric tensor elements
+  ! up to third order in Larmor radius for hermitian part
+  !
+
   implicit none
-! arguments
+
+  ! subroutine arguments
   integer :: lrm,fast
   real(wp_) :: yg,mu,npl
   complex(wp_) :: a330
   complex(wp_), dimension(3,3,lrm) :: epsl
-! local variables
+
+  ! local variables
   integer :: i,j,l,is,k,lm
   real(wp_) :: cr,ci
   real(wp_) :: npl2,dnl,w,asl,bsl,cmxw,fal
   real(wp_), dimension(-lrm:lrm,0:2,0:lrm) :: rr
   real(wp_), dimension(lrm,0:2,lrm) :: ri
   complex(wp_) :: ca11,ca12,ca22,ca13,ca23,ca33,cq0p,cq0m,cq1p,cq1m,cq2p
-!
+
   npl2=npl**2
   dnl=one-npl2
-!
+
   cmxw=one+15.0_wp_/(8.0_wp_*mu)+105.0_wp_/(128.0_wp_*mu**2)
   cr=-mu*mu/(sqrt_pi*cmxw)
   ci=sqrt(2.0_wp_*pi*mu)*mu**2/cmxw
-!
+
   do l=1,lrm
     do j=1,3
       do i=1,3
@@ -298,73 +299,92 @@ subroutine diel_tens_fr(yg,mu,npl,a330,epsl,lrm,fast)
       end do
     end do
   end do
-!
+
   if (fast<4) then
     call hermitian(rr,yg,mu,npl,cr,fast,lrm)
   else
     call hermitian_2(rr,yg,mu,npl,cr,fast,lrm)
   end if
-!
+
   call antihermitian(ri,yg,mu,npl,ci,lrm)
-!
-  do l=1,lrm
-    lm=l-1
-    fal=-0.25_wp_**l*fact(2*l)/(fact(l)**2*yg**(2*lm))
-    ca11=czero
-    ca12=czero
-    ca13=czero
-    ca22=czero
-    ca23=czero
-    ca33=czero
-    do is=0,l
-      k=l-is
-      w=(-one)**k
-!
-      asl=w/(fact(is+l)*fact(l-is))
-      bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one))
-!
-      if(is.gt.0) then
-        cq0p=rr(is,0,l)+rr(-is,0,l)+im*ri(is,0,l)
-        cq0m=rr(is,0,l)-rr(-is,0,l)+im*ri(is,0,l)
-        cq1p=rr(is,1,l)+rr(-is,1,l)+im*ri(is,1,l)
-        cq1m=rr(is,1,l)-rr(-is,1,l)+im*ri(is,1,l)
-        cq2p=rr(is,2,l)+rr(-is,2,l)+im*ri(is,2,l)
-      else
-        cq0p=rr(is,0,l)
-        cq0m=rr(is,0,l)
-        cq1p=rr(is,1,l)
-        cq1m=rr(is,1,l)
-        cq2p=rr(is,2,l)
-      end if
-!
-      ca11=ca11+is**2*asl*cq0p
-      ca12=ca12+is*l*asl*cq0m
-      ca22=ca22+bsl*cq0p
-      ca13=ca13+is*asl*cq1m/yg
-      ca23=ca23+l*asl*cq1p/yg
-      ca33=ca33+asl*cq2p/yg**2
+  block
+    ! compute all factorials incrementally
+    integer :: fact_l, fact_2l, fact_l2
+    integer :: fact_lpis, fact_lmis
+
+    fact_l  = 1  ! l!
+    fact_2l = 1  ! (2l)!
+    fact_l2 = 1  ! (l!)²
+
+    do l=1,lrm
+      fact_l  = fact_l  * l
+      fact_2l = fact_2l * 2*l * (2*l - 1)
+      fact_l2 = fact_l2 * l**2
+
+      lm=l-1
+      fal=-0.25_wp_**l * fact_2l/(fact_l2 * yg**(2*lm))
+      ca11=czero
+      ca12=czero
+      ca13=czero
+      ca22=czero
+      ca23=czero
+      ca33=czero
+
+      fact_lpis = fact_l  ! (l+is)!
+      fact_lmis = fact_l  ! (l-is)!
+
+      do is=0,l
+        k=l-is
+        w=(-one)**k
+
+        asl=w/(fact_lpis * fact_lmis * one)
+        bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one))
+
+        fact_lpis = fact_lpis * (l + is+1)
+        fact_lmis = merge(fact_lmis / (l - is), 1, is < l)
+
+        if(is.gt.0) then
+          cq0p=rr(is,0,l)+rr(-is,0,l)+im*ri(is,0,l)
+          cq0m=rr(is,0,l)-rr(-is,0,l)+im*ri(is,0,l)
+          cq1p=rr(is,1,l)+rr(-is,1,l)+im*ri(is,1,l)
+          cq1m=rr(is,1,l)-rr(-is,1,l)+im*ri(is,1,l)
+          cq2p=rr(is,2,l)+rr(-is,2,l)+im*ri(is,2,l)
+        else
+          cq0p=rr(is,0,l)
+          cq0m=rr(is,0,l)
+          cq1p=rr(is,1,l)
+          cq1m=rr(is,1,l)
+          cq2p=rr(is,2,l)
+        end if
+
+        ca11=ca11+is**2*asl*cq0p
+        ca12=ca12+is*l*asl*cq0m
+        ca22=ca22+bsl*cq0p
+        ca13=ca13+is*asl*cq1m/yg
+        ca23=ca23+l*asl*cq1p/yg
+        ca33=ca33+asl*cq2p/yg**2
+      end do
+      epsl(1,1,l) =  - ca11*fal
+      epsl(1,2,l) =  + im*ca12*fal
+      epsl(2,2,l) =  - ca22*fal
+      epsl(1,3,l) =  - ca13*fal
+      epsl(2,3,l) =  - im*ca23*fal
+      epsl(3,3,l) =  - ca33*fal
     end do
-    epsl(1,1,l) =  - ca11*fal
-    epsl(1,2,l) =  + im*ca12*fal
-    epsl(2,2,l) =  - ca22*fal
-    epsl(1,3,l) =  - ca13*fal
-    epsl(2,3,l) =  - im*ca23*fal
-    epsl(3,3,l) =  - ca33*fal
-  end do
-!
+  end block
+
   cq2p=rr(0,2,0)
   a330=cq2p
-!
+
   do l=1,lrm
     epsl(2,1,l) = - epsl(1,2,l)
     epsl(3,1,l) =   epsl(1,3,l)
     epsl(3,2,l) = - epsl(2,3,l)
   end do
-!
+
 end subroutine diel_tens_fr
-!
-!
-!
+
+
 subroutine hermitian(rr,yg,mu,npl,cr,fast,lrm)
   use eierf, only : calcei3
   implicit none
@@ -780,7 +800,6 @@ end function fhermit
 !
 !
 subroutine antihermitian(ri,yg,mu,npl,ci,lrm)
-  use math, only : fact
   implicit none
 ! local constants
   integer, parameter :: lmx=20,nmx=lmx+2
@@ -856,17 +875,26 @@ subroutine antihermitian(ri,yg,mu,npl,ci,lrm)
       else
         ee=exp(-mu*(ygn-one+npl*ub))
         call ssbi(aa,n,lrm,fsbi)
-        do m=n,lrm
-          cm=sqrt_pi*fact(m)*du**(2*m+1)
-          cim=0.5_wp_*ci*dnl**m
-          mm=m-n+1
-          fi0m=cm*fsbi(mm)
-          fi1m=-0.5_wp_*aa*cm*fsbi(mm+1)
-          fi2m=0.5_wp_*cm*(fsbi(mm+1)+0.5_wp_*aa*aa*fsbi(mm+2))
-          ri(n,0,m)=cim*ee*fi0m
-          ri(n,1,m)=cim*ee*(du*fi1m+ub*fi0m)
-          ri(n,2,m)=cim*ee*(du*du*fi2m+2.0_wp_*du*ub*fi1m+ub*ub*fi0m)
-        end do
+
+        block
+          ! incrementally compute m!
+          integer :: fact_m
+          fact_m = 1
+
+          do m=n,lrm
+            fact_m = fact_m * m
+
+            cm=sqrt_pi*fact_m*du**(2*m+1)
+            cim=0.5_wp_*ci*dnl**m
+            mm=m-n+1
+            fi0m=cm*fsbi(mm)
+            fi1m=-0.5_wp_*aa*cm*fsbi(mm+1)
+            fi2m=0.5_wp_*cm*(fsbi(mm+1)+0.5_wp_*aa*aa*fsbi(mm+2))
+            ri(n,0,m)=cim*ee*fi0m
+            ri(n,1,m)=cim*ee*(du*fi1m+ub*fi0m)
+            ri(n,2,m)=cim*ee*(du*du*fi2m+2.0_wp_*du*ub*fi1m+ub*ub*fi0m)
+          end do
+        end block
       end if
     end if
   end do
@@ -917,79 +945,98 @@ subroutine expinit
   end do
 !      
 end subroutine expinit
-!
-!
-!
+
+
 subroutine diel_tens_wr(yg,mu,npl,a330,epsl,lrm)
-! Weakly relativistic dielectric tensor computation of dielectric
-! tensor elements (Krivenki and Orefice, JPP 30,125 - 1983)
-!
-  use math, only : fact
+  ! Weakly relativistic dielectric tensor computation of dielectric
+  ! tensor elements (Krivenki and Orefice, JPP 30,125 - 1983)
+  !
+
   implicit none
-! arguments
+
+  ! subroutine arguments
   integer :: lrm
   real(wp_) :: yg,npl,mu
   complex(wp_) :: a330,epsl(3,3,lrm)
-! local variables
+
+  ! local variables
   integer :: l,lm,is,k
   real(wp_) :: npl2,fcl,w,asl,bsl
   complex(wp_) :: ca11,ca12,ca13,ca22,ca23,ca33,cq0p,cq0m,cq1p,cq1m,cq2p
   complex(wp_), dimension(0:lrm,0:2) :: cefp,cefm
-!
+
   npl2=npl*npl
-!
+
   call fsup(cefp,cefm,lrm,yg,npl,mu)
-!
-  do l=1,lrm
-    lm=l-1
-    fcl=0.5_wp_**l*((one/yg)**2/mu)**lm*fact(2*l)/fact(l)
-    ca11=czero
-    ca12=czero
-    ca13=czero
-    ca22=czero
-    ca23=czero
-    ca33=czero
-    do is=0,l
-      k=l-is
-      w=(-one)**k
-!
-      asl=w/(fact(is+l)*fact(l-is))
-      bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one))
-!
-      cq0p=mu*cefp(is,0)
-      cq0m=mu*cefm(is,0)
-      cq1p=mu*npl*(cefp(is,0)-cefp(is,1))
-      cq1m=mu*npl*(cefm(is,0)-cefm(is,1))
-      cq2p=cefp(is,1)+mu*npl2*(cefp(is,2)+cefp(is,0)-2.0_wp_*cefp(is,1))
-!
-      ca11=ca11+is**2*asl*cq0p
-      ca12=ca12+is*l*asl*cq0m
-      ca22=ca22+bsl*cq0p
-      ca13=ca13+is*asl*cq1m/yg
-      ca23=ca23+l*asl*cq1p/yg
-      ca33=ca33+asl*cq2p/yg**2
+
+  block
+    ! compute all factorials incrementally
+    integer :: fact_l, fact_2l_l
+    integer :: fact_lpis, fact_lmis
+
+    fact_l    = 1  ! l!
+    fact_2l_l = 1  ! (2l)!/l!
+
+    do l=1,lrm
+      fact_l    = fact_l * l
+      fact_2l_l = fact_2l_l * (4*l - 2)  ! see http://oeis.org/A001813
+
+      lm=l-1
+      fcl=0.5_wp_**l*((one/yg)**2/mu)**lm*fact_2l_l
+      ca11=czero
+      ca12=czero
+      ca13=czero
+      ca22=czero
+      ca23=czero
+      ca33=czero
+
+      fact_lpis = fact_l  ! (l+is)!
+      fact_lmis = fact_l  ! (l-is)!
+
+      do is=0,l
+        k=l-is
+        w=(-one)**k
+
+        asl=w/(fact_lpis * fact_lmis * one)
+        bsl=asl*(is*is+dble(2*k*lm*(l+is))/(2.0_wp_*l-one))
+
+        fact_lpis = fact_lpis * (l + is+1)
+        fact_lmis = merge(fact_lmis / (l - is), 1, is < l)
+
+        cq0p=mu*cefp(is,0)
+        cq0m=mu*cefm(is,0)
+        cq1p=mu*npl*(cefp(is,0)-cefp(is,1))
+        cq1m=mu*npl*(cefm(is,0)-cefm(is,1))
+        cq2p=cefp(is,1)+mu*npl2*(cefp(is,2)+cefp(is,0)-2.0_wp_*cefp(is,1))
+
+        ca11=ca11+is**2*asl*cq0p
+        ca12=ca12+is*l*asl*cq0m
+        ca22=ca22+bsl*cq0p
+        ca13=ca13+is*asl*cq1m/yg
+        ca23=ca23+l*asl*cq1p/yg
+        ca33=ca33+asl*cq2p/yg**2
+      end do
+      epsl(1,1,l) = -ca11*fcl
+      epsl(1,2,l) = +im*ca12*fcl
+      epsl(2,2,l) = -ca22*fcl
+      epsl(1,3,l) = -ca13*fcl
+      epsl(2,3,l) = -im*ca23*fcl
+      epsl(3,3,l) = -ca33*fcl
     end do
-    epsl(1,1,l) = -ca11*fcl
-    epsl(1,2,l) = +im*ca12*fcl
-    epsl(2,2,l) = -ca22*fcl
-    epsl(1,3,l) = -ca13*fcl
-    epsl(2,3,l) = -im*ca23*fcl
-    epsl(3,3,l) = -ca33*fcl
-  end do
-!
+  end block
+
   cq2p=cefp(0,1)+mu*npl2*(cefp(0,2)+cefp(0,0)-2.0_wp_*cefp(0,1))
   a330=-mu*cq2p
-!
+
   do l=1,lrm
     epsl(2,1,l) = - epsl(1,2,l)
     epsl(3,1,l) =   epsl(1,3,l)
     epsl(3,2,l) = - epsl(2,3,l)
   end do
-!
+
 end subroutine diel_tens_wr
-!
-!
-!
+
+
 subroutine fsup(cefp,cefm,lrm,yg,npl,mu)
   implicit none
 ! local constants