w3d_injection.F



[createparticlesincells] [ellipseperimeter] [fill_inj] [fill_inj_rz] [getinj_phi] [getinj_phi_3d] [getinj_phi_mr] [gettinj_phi] [inj_addtemp3d] [inj_sete] [inj_sete3d] [inj_sete_3darray] [inj_setrho3d] [inj_setrho3d_z] [inj_setrhomr] [inj_smoother] [inj_transform] [injctint] [inject3d] [loadrho_inject] [tinj_sete3d]

#include top.h
 @(#) File w3d_injection.F, version $Revision: 1.160 $, $Date: 2012/01/26 23:29:51 $
 # Copyright (c) 1990-1998, The Regents of the University of California.
 # All rights reserved.  See LEGAL.LLNL for full text and disclaimer.
   This is main file of package W3D of code WARP
   3d electrostatic PIC code, Cartesian geometry, for beam problems
   Alex Friedman, LLNL, (510)422-0827
   David P. Grote, LLNL, (510)423-7194

[step3d]
      subroutine inject3d(itask,pgroup)
      use ParticleGroupmodule
      use GlobalVars
      use Subtimersw3d
      use Constant
      use Beam_acc
      use InGen
      use InGen3d
      use InPart
      use InPart3d
      use InMesh3d
      use Particles,Only: spid,wpid,tpid,rpid,ssn,lmappid,npid,
     &                    xbirthpid,ybirthpid,zbirthpid,zbirthlabpid,
     &                    uxbirthpid,uybirthpid,uzbirthpid
      use Picglb
      use InjectVars
      use InjectVars3d
      use Picglb3d
      use Setpwork3d
      use Fields3dParticles
      use DebugFlags,Only: debug
      use Lattice,Only:lmapfillz
      use Z_arrays
#ifdef MPIPARALLEL
      use Parallel
#endif
      integer(ISZ):: itask
      type(ParticleGroup):: pgroup

  This routine does the injection of particles.  It has three main parts, the
  first cleans up the particle arrays and makes sure that there is enough room
  for the new particles.  The second is the coding for constant current
  injection.  The third part is for space-charge limited injection.

      integer(ISZ):: is,jsid,ij,ii,ip,nn,ix,iy,iz,ipmin,ixx,iyy,i1,i2
      integer(ISZ):: inj_is
      real(kind=8):: rnpinjct,rnn,rnncl,rnns,t
      real(kind=8):: rr,vavez,vnorm,zs,wxx,wyy,aa,az,wz,xm,ym,zm,rsign,dr,dri
      real(kind=8):: qoverm,const,dxi,dyi,dzi,vznorm,rp,ai,bi,xrand,yrand
      real(kind=8):: clightsqi
      real(kind=8):: zinj,ainj,ainji,binj,binji
      real(kind=8):: apinj,bpinj,rinj,rinji,xpinj,ypinj,vzinj,ainjp1,binjp1
      real(kind=8):: sphericalcorrection
      real(kind=8):: inte,qold,qadd
      real(kind=8):: vtx,vty,vtz,larmorfreq,bmag
      real(kind=8):: fulldt_s
      real(kind=8):: zmid,zleni,ztilt,g2,gamma
      real(kind=8):: p1x,p1y,p1z,p2x,p2y,p2z,w1x,w1y,w1z,area,circum
      real(kind=8):: ellipseperimeter
      real(kind=8):: thetaextent,dtheta,tmin,tmax
      integer(ISZ):: i1x,i1y,i1z
      real(kind=8):: rnrev,wranf,wrandom,wrandomgauss
      integer(ISZ):: ith,nti
      real(kind=8):: cl_const(0:pgroup%ns-1),jton(0:pgroup%ns-1),vzconst(0:pgroup%ns-1)
      real(kind=8):: te_const(0:pgroup%ns-1), dw
      real(kind=8):: zmin_tmp, zmax_tmp, r_tmp, theta_tmp, xt_tmp, yt_tmp
      real(kind=8),pointer:: ppvz(:)
      integer(ISZ):: spreadx, spready
      integer(ISZ):: spreadvx, spreadvy
      integer(ISZ):: ixmin,ixmax,iymin,iymax
      logical(ISZ):: linj_cell,ltinj_full_circle
      real(kind=8),pointer:: xx(:),yy(:),zz(:),ux(:),uy(:),uz(:),id(:),gg(:)
      real(kind=8),pointer:: ex(:),ey(:),ez(:),bx(:),by(:),bz(:)
      real(kind=8),pointer:: bendres(:),bendradi(:),mask(:)
      integer(ISZ):: allocerror
      real(kind=8):: substarttime,wtime
      real(kind=8):: uztemp,uztempfloor,invclightsq,uzboost,vzboost
      real(kind=8),allocatable:: exlab(:),eylab(:),ezlab(:),
     &                           bxlab(:),bylab(:),bzlab(:),
     &                           zlab(:),uxlab(:),uylab(:),
     &                           uzlab(:),ginvlab(:),tlab(:),dtmaps(:),zend(:),
     &                           tstart(:),tend(:),zbeam_maps(:),zbeamend_maps(:)
      if (lw3dtimesubs) substarttime = wtime()

      if (inject <= 0 .and. tinject <= 0) return

      --- Check if tinject should be set equal to inject.
      --- tinject is a new variable that allows different injection algorithms
      --- to be used to longitudinal and transverse injection sources.
      --- If the user sets up a transverse injection source, but does not
      --- set tinject, it is assumed that tinject should take on the same
      --- value as inject. This is done here so that the check is always
      --- made in case the user turns on injection at a later time.
      if (tinject == 0 .and. ntinj > 0) then
        tinject = inject
      endif

      call setuppgroup(pgroup,pgroup%ns)

      dxi = 1./inj_dx
      dyi = 1./inj_dy
      dzi = 1./inj_dz
      if (lrelativ) clightsqi = 1./clight**2

  Print warning if vbeamfrm is not zero
      if (debug .and. abs(vbeamfrm) > 0.) then
       call remark("WARNING: vbeamfrm is not zero and injection is being used.")
       call remark("A core dump will result if the emitting surface moves off of the field grid.")
      endif

      if (minval(ainject) <= 0. .or. minval(binject) <= 0.) then
        call kaboom("inject3d: ainject and binject must be both greater than zero.")
        return
      endif

  Clear out the lost particles.
      if (itask == 1) call clearpart(pgroup,-1,2)

      --- Set the potential near the emitting surface.
      --- Note that these routines are now called twice (once for each itask) which
      --- may not be necessary in most cases, but it does ensure that the phi arrays
      --- are setup properly if the fields are change by the user.
      call getinj_phi()
      call gettinj_phi()

      --- Setup the x and y spread, which are used when accessing the
      --- injection arrays, such as inj_phi. This avoids out of bounds access.
      --- Note that when spread is 0, the associated weight will also come
      --- out to be zero so those terms are not used anyway.
      --- This also affects the range that particles are loaded into.
      if(solvergeom==XYZgeom) then
        spreadx = 1
        spready = 1
        spreadvx = 1
        spreadvy = 1
      elseif(solvergeom==RZgeom) then
        spreadx = 1
        spreadvx = 1
        spreadvy = 1
        if(l_inj_rz .or. l_inj_rz_grid) then
          spready = 0
        else
          spready = 1
        end if
      elseif(solvergeom==XZgeom) then
        spreadx = 1
        spready = 0
        spreadvx = 1
        spreadvy = 0
      elseif(solvergeom==Zgeom) then
        spreadx = 0
        spready = 0
        spreadvx = 0
        spreadvy = 0
      endif

  If itask = 1, zero npinjected. This is done here since npinjected is set by
  both normal and transverse space-charge limited injection. Both sections
  add onto npinjected. Only do this if actually injecting particles.
      if (itask == 1) npinjected = 0

  The check of whether or not there is enough space for the injected particles
  is now done in each injection method.

      if (inject == 1) then
  Injects particles with constant (or user varied) current.  It does a split
  leap frog advance over partial time steps, starting with all of the particles
  in the plane at zinject.  The particles are advanced uniformly distribuated
  fractions of a timestep to fill the injection region.  The E and B fields are
  first gathered (assuming that injection is not being done inside a bend) then
  a half advance is done on the velocity over the partial timestep.  The
  positions are then advanced for the full amount of the partial timestep and
  the particles are loaded onto the rho grid.  The routine then exits so the
  fields can be solved for.  The routine is called again to complete the
  velocity advance using the new fields including the injected particles.  The
  velocity ends up at time level it-1/2 to match the time level of the rest of
  the particles.
 
  One caveat is the gathering of the E fields (in the call to sete3d).  If the
  injection plane is at the left hand of the grid, the E gather will use the
  plane iz = -1 which is not necessarily correct.  For example, in injection
  off of a conducting plane, phi at iz=-1 is the same as phi at iz=0, this
  gives a E field that is too small (by about a factor of 2).

        if (itask == 1) then
  Start injected particles advance.  Particles end up with position
  at current time level and velocity at a time level of one half of the time
  each particle was advanced from the plane of injection (i.e. all different).

          --- Zero out temp array.
          npinjtmp = 0
          npinje_s = 0
          inj_prev = 0.

          --- loop over injection sources
          do ij=1,ninject
            inj_ij = ij
            ainj = ainject(ij)
            binj = binject(ij)
            ainji = 1./ainject(ij)
            binji = 1./binject(ij)
            zinj = zinject(ij)
            apinj = apinject(ij)
            bpinj = bpinject(ij)
            xpinj = xpinject(ij)
            ypinj = ypinject(ij)

          --- Loop over species.
          do is=1,pgroup%ns

            if (pgroup%sid(is-1) == -1) pgroup%sid(is-1) = is -1 
            jsid = pgroup%sid(is-1)
            inj_js = is - 1
            inj_is = min(jsid+1,inj_ns)
            vzinj = vzinject(ij,jsid+1)
            if (.not. pgroup%ldts(is-1)) cycle
            fulldt_s = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)

            if (l_inj_user_particles) then
              if (finject(ij,jsid+1)==0.) cycle

              --- Note that inj_is is accessible from python.
              call callpythonfunc("generateuserparticlesforinjection",
     &                            "controllers")
              rnn = npgrp
              nn = npgrp
              if (nn == 0) cycle

              --- Check if there is enough room for the new injected particles.
              call chckpart(pgroup,is,nn,0)

            else

              rnn = rnpinje_s(jsid+1)*finject(ij,jsid+1)
              --- Add a random number to the number of particles injected
              --- so that the average number of particles injected is
              --- correct.  For example, if rnn < 1., without the
              --- addition of the random number, no particles would ever
              --- be injected.  With the random number, particles will be
              --- injected and but the average number will be less than 1.
              rnn = rnn + wranf()
              nn = int(rnn)
              if(nn==0) cycle

              --- Check if there is enough room for the new injected particles.
              call chckpart(pgroup,is,nn,0)

              --- Make sure there is enough temp space
              if (nn > npgrp) then
                npgrp = nn
                call gallot("Setpwork3d",0)
              endif

              --- set random numbers
              --- rt(ip) used to hold f(r**2)=constant
              --- tt(ip) used to hold f(theta)=constant if injection
              --- is over an elliptical surface
              do ip = 1,nn
                rt(ip) = wrandom(xrandom,injctcnt+ip-1,dig1,1,1)
                rt(ip) = rt(ip)*(1.-(ainjmin(ij)*ainji)**2) +
     &                              (ainjmin(ij)*ainji)**2
                tt(ip) = wrandom(xrandom,injctcnt+ip-1,dig2,1,1)
              enddo

              if (l_inj_regular) then
                do ip=1,nn
                  rt(ip) = (ip-1.)/(nn-1.)
                enddo
              endif

              --- Transform the positions into a Gaussian if distrbtn is GA0
              if (distrbtn == "GA0") then
                do ip = 1,nn
                  rt(ip) = -0.5*log(rt(ip))
                enddo
              endif

              --- or rt(ip) used to hold f(r)~(h+(1-h)r^2)
              if (hollow == 2) then
                do ip = 1,nn
                  rt(ip) = (1 + hollow_h)*rt(ip)/
     &                 (hollow_h + sqrt(hollow_h**2 + (1. - hollow_h**2)*rt(ip)))
                enddo
              endif

              --- Get velocity distribution.
              if (distrbtn == "K-V") then
                --- For the K-V load, need another random number.
                do ip = 1,nn
                  t = wrandom(vtrandom,injctcnt+ip-1,dig3,1,1)
                  rr = sqrt(1. - rt(ip))
                  uxt(ip) = rr*cos(2.*pi*t)
                  uyt(ip) = rr*sin(2.*pi*t)
                enddo
              else
                --- Default is to use a Semi-Gaussian distribution
                --- Note that the loops must be broken up in this way since the
                --- routine wrandomgauss must be called in the correct order.
                do ip = 1,nn
                  uxt(ip) = wrandomgauss(vtrandom,injctcnt+ip-1,dig3,dig4,
     &                                   1,1,.true.)
                enddo
                do ip = 1,nn
                  uyt(ip) = wrandomgauss(vtrandom,injctcnt+ip-1,dig5,dig6,
     &                                   1,1,.true.)
                enddo
              endif

              --- Axial thermal spread
              if (lhalfmaxwellinject(is)==1) then
              inject vz*Maxwellian so f is half-maxwellian
                uztempfloor = 1.e-14
                do ip = 1,nn
                   uztemp = max(uztempfloor,wranf())
                   uzt(ip) = sqrt(2.*log(1./uztemp))
                enddo
              else
                default: injection rate is shifted Maxwellian
                do ip = 1,nn
                   uzt(ip) = wrandomgauss(vzrandom,injctcnt+ip-1,dig7,dig8,
     &                                   1,1,.true.)
                enddo
              endif
                

              --- Initialize positions as rectangular or elliptical depending on linj_rectangle
              if(linj_rectangle) then
                do ip=1,nn
                  xt(ip) = ainj*(2.*rt(ip)-1.0)
                  yt(ip) = binj*(2.*tt(ip)-1.0)
                enddo
              else if (l_inj_rz) then
                do ip=1,nn
                  tt(ip) = 2.*pi*tt(ip)
                  xt(ip) = ainj*rt(ip)*cos(tt(ip))
                  yt(ip) = binj*rt(ip)*sin(tt(ip))
                enddo
              else
                do ip=1,nn
                  --- Convert r**2 into r and tt into a random angle
                  rt(ip) = sqrt(rt(ip))
                  tt(ip) = 2.*pi*tt(ip)
                  xt(ip) = ainj*rt(ip)*cos(tt(ip))
                  yt(ip) = binj*rt(ip)*sin(tt(ip))
                enddo
              endif
            endif

            --- Precalculate larmor frequency if needed. The code assumes
            --- that the beam was injected in a field free region.
            if (linj_includebz0) then
              larmorfreq = -0.5*pgroup%sq(is)*bz0/pgroup%sm(is)/gammabar
            endif

            --- Increment the particle counter for the digit reversed seed
            injctcnt = injctcnt + nn

            do ip=1,nn
              --- find injection surface axial location via interpolation
              --- The absolute values are needed for the cases of symmetry.
              ixx = spreadx*(abs(xt(ip) - inj_xmmin(ij))*dxi)
              iyy = spready*(abs(yt(ip) - inj_ymmin(ij))*dyi)
              wxx = spreadx*(abs(xt(ip) - inj_xmmin(ij))*dxi - ixx)
              wyy = spready*(abs(yt(ip) - inj_ymmin(ij))*dyi - iyy)

              if (l_inj_user_particles_z) then
                zs = zt(ip)
              else
                if(l_inj_exact) then
                  --- calculate injection surface axial location exactly
                  rr = xt(ip)**2+yt(ip)**2
                  zs = rr/(abs(rinject(ij)) + sqrt(max(0.,rinject(ij)**2 - rr)))
                  if (rinject(ij) < 0.) zs = -zs
                  if ((xt(ip)**2+yt(ip)**2) > rinject(ij)**2)
     &              zs = rinject(ij)
                else
                  zs=inj_grid(ixx        ,iyy        ,ij)*(1. - wxx)*(1. - wyy)+
     &               inj_grid(ixx+spreadx,iyy        ,ij)*      wxx *(1. - wyy)+
     &               inj_grid(ixx        ,iyy+spready,ij)*(1. - wxx)*      wyy +
     &               inj_grid(ixx+spreadx,iyy+spready,ij)*      wxx *      wyy
                endif
              endif

              --- Only save particle if it is within the injection region.
              if (.not. l_inj_zmminmmaxglobal) then
                if (vzinj > 0.) then
                  if (zpminlocal+zgrid > zs+zinj .or.
     &                zs+zinj >= zpmaxlocal+zgrid) cycle
                else
                  if (zpminlocal+zgrid >= zs+zinj .or.
     &                zs+zinj > zpmaxlocal+zgrid) cycle
                endif
              else
                if (vzinj > 0.) then
                  if (zpmin+zgrid > zs+zinj .or. zs+zinj >= zpmax+zgrid) cycle
                 else
                  if (zpmin+zgrid >= zs+zinj .or. zs+zinj > zpmax+zgrid) cycle
                endif
              endif
              
              ii = pgroup%ins(is) - 1 - npinjtmp(ij,is)
              --- Clear out any old data from pid
              pgroup%pid(ii,:) = 0.
              --- Save the particle data
              pgroup%xp(ii) = xt(ip)
              pgroup%yp(ii) = yt(ip)
              pgroup%zp(ii) = zs
              if (l_inj_user_particles_v) then
                pgroup%uxp(ii) = uxt(ip)
                pgroup%uyp(ii) = uyt(ip)
                pgroup%uzp(ii) = uzt(ip)
                if (npid > 0 .and. npidgrp > 0) then
                  pgroup%pid(ii,:) = pidt(ip,:)
                endif
              else
                vtx = vzinj*0.5*emitx_s(jsid+1)*ainji + vthperp_s(jsid+1)
                vty = vzinj*0.5*emity_s(jsid+1)*binji + vthperp_s(jsid+1)
                pgroup%uxp(ii) = vzinj*(apinj*pgroup%xp(ii)*ainji + xpinj) + vtx*uxt(ip)
                pgroup%uyp(ii) = vzinj*(bpinj*pgroup%yp(ii)*binji + ypinj) + vty*uyt(ip)
                if(l_inj_addtempz_abs) then
                  pgroup%uzp(ii) = vzinj + abs(0.5*vthz_s(jsid+1)*uzt(ip))
                else
                  pgroup%uzp(ii) = vzinj + 0.5*vthz_s(jsid+1)*uzt(ip)
                endif
              endif

              --- Save the particle weight
              if (l_inj_rz) pgroup%pid(ii,wpid) = 2.*rt(ip)

              --- Scale the particle weight.
              if (wpid > 0) then
                if (pgroup%pid(ii,wpid) == 0.) then
                  pgroup%pid(ii,wpid) = winject(ij,is)
                else
                  pgroup%pid(ii,wpid) = pgroup%pid(ii,wpid)*winject(ij,is)
                endif
              endif

              if (linj_includebz0) then
                rt(ip) = sqrt((pgroup%xp(ii)+xinject(ij))**2 + (pgroup%yp(ii)+yinject(ij))**2)
                tt(ip) = atan2(pgroup%yp(ii)+yinject(ij),pgroup%xp(ii)+xinject(ij))
                pgroup%uxp(ii) = pgroup%uxp(ii) - larmorfreq*rt(ip)*sin(tt(ip))
                pgroup%uyp(ii) = pgroup%uyp(ii) + larmorfreq*rt(ip)*cos(tt(ip))
              endif
              if (lrelativ) then
                g2 = (1. -
     &   (pgroup%uxp(ii)**2+pgroup%uyp(ii)**2+pgroup%uzp(ii)**2)*clightsqi)
                if (g2 < 0.) then
                  call kaboom("inject3d: particle speed is greater than clight")
                endif
                pgroup%gaminv(ii) = sqrt(g2)
                gamma = 1./pgroup%gaminv(ii)
                pgroup%uxp(ii) = pgroup%uxp(ii)*gamma
                pgroup%uyp(ii) = pgroup%uyp(ii)*gamma
                pgroup%uzp(ii) = pgroup%uzp(ii)*gamma
              else
                pgroup%gaminv(ii) = 1.
              endif
              --- Save the ID of the injection source.
              pgroup%pid(ii,injpid) = ij
              --- Save SSN 
              if (spidɬ) then
                pgroup%pid(ii,spid) = ssn
                ssn = ssn+1
              end if
              --- save time of creation if tpid > 0
              if(tpidɬ) pgroup%pid(ii,tpid) = time + fulldt_s
              --- save radius of creation if rpid > 0
              if(rpidɬ) then
                pgroup%pid(ii,rpid) = sqrt(pgroup%xp(ii)**2+pgroup%yp(ii)**2)
              endif
              if (xbirthpid > 0) pgroup%pid(ii,xbirthpid) = pgroup%xp(ii)
              if (ybirthpid > 0) pgroup%pid(ii,ybirthpid) = pgroup%yp(ii)
              if (zbirthpid > 0) pgroup%pid(ii,zbirthpid) = pgroup%zp(ii)
              if (uxbirthpid > 0) pgroup%pid(ii,uxbirthpid) = pgroup%uxp(ii)
              if (uybirthpid > 0) pgroup%pid(ii,uybirthpid) = pgroup%uyp(ii)
              if (uzbirthpid > 0) pgroup%pid(ii,uzbirthpid) = pgroup%uzp(ii)
              --- increment particle counter
              npinjtmp(ij,jsid+1) = npinjtmp(ij,jsid+1) + 1
              inj_prev(ixx        ,iyy        ,ij,inj_is) =
     &        inj_prev(ixx        ,iyy        ,ij,inj_is) + (1.-wxx)*(1.-wyy)
              inj_prev(ixx+spreadx,iyy        ,ij,inj_is) =
     &        inj_prev(ixx+spreadx,iyy        ,ij,inj_is) + (   wxx)*(1.-wyy)
              inj_prev(ixx        ,iyy+spready,ij,inj_is) =
     &        inj_prev(ixx        ,iyy+spready,ij,inj_is) + (1.-wxx)*(   wyy)
              inj_prev(ixx+spreadx,iyy+spready,ij,inj_is) =
     &        inj_prev(ixx+spreadx,iyy+spready,ij,inj_is) + (   wxx)*(   wyy)
            enddo

            --- Get actual number of particles injected and ipmin
            nn = npinjtmp(ij,jsid+1)
            if(nn==0) cycle
            
            --- Sum npinje_s to get the total number of particles injected.
            npinje_s(jsid+1) = npinje_s(jsid+1) + nn
            npinjected       = npinjected       + nn

            --- make injected particles live particles
            pgroup%ins(is) = pgroup%ins(is) - nn
            pgroup%nps(is) = pgroup%nps(is) + nn

            --- Transform particles to lab frame
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is) + nn - 1
            if (nn > 0) then
              --- The check if nn > 0 is needed since if npmax was zero
              --- and no particles were injected then npmax stays zero
              --- and the particle arrays are unallocated.
              call inj_transform(nn,pgroup%xp(i1:i2),pgroup%yp(i1:i2),
     &                           pgroup%zp(i1:i2),1,ij,1,1)
              call inj_transform(nn,pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),
     &                           pgroup%uzp(i1:i2),1,ij,1,0)
            endif

#ifdef MPIPARALLEL

            --- Check if particles are within the transverse domain
            --- and skip them if not. Note that this makes inj_npactual
            --- unreliable since it is not updated to reflect the
            --- particles that are removed.
            if (solvergeom == RZgeom) then
              do ii=pgroup%ins(is),pgroup%ins(is)+nn-1
                rr = pgroup%xp(ii)**2 + pgroup%yp(ii)**2
                if (rr < xpminlocal**2 .or. rr >= xpmaxlocal**2) then
                  pgroup%gaminv(ii) = -1.
                endif
              enddo
            else
              do ii=pgroup%ins(is),pgroup%ins(is)+nn-1
                xm = pgroup%xp(ii)
                if (l4symtry) xm = abs(xm)
                ym = pgroup%yp(ii)
                if (l2symtry .or. l4symtry) ym = abs(ym)
                if (xm < xpminlocal .or. xm >= xpmaxlocal .or.
     &              ym < ypminlocal .or. ym >= ypmaxlocal) then
                  pgroup%gaminv(ii) = -1.
                endif
              enddo
            endif

            --- Temporarily set nps so that only recently injected
            --- particles are affected. This is done only for optimization.
            nn = pgroup%nps(is) - npinje_s(jsid+1)
            pgroup%nps(is) = npinje_s(jsid+1)

            --- Clear out the lost particles. Set fillmethod=3 so that
            --- live particles are shifted upward. This keeps the block
            --- of injected particle contiguous with the rest of the
            --- live particles.
            call clearpart(pgroup,is,3)

            --- Reset pgroup%nps(is).
            pgroup%nps(is) = pgroup%nps(is) + nn
            nn = i2-i1+1

#endif

            --- get E self-field at initial positions
            if (linj_efromgrid) then
              call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,
     &                                   pgroup%xp(i1:i2),pgroup%yp(i1:i2),
     &                                   pgroup%zp(i1:i2),at,bt,apt,xt,yt,zt)
              call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,
     &                                   pgroup%xp(i1:i2),pgroup%yp(i1:i2),
     &                                   pgroup%zp(i1:i2),xt,yt,zt)
            else
              at = 0.
              bt = 0.
              apt = 0.
              xt = 0.
              yt = 0.
              zt = 0.
            endif
            call inj_sete3d(nn,pgroup%xp(i1:i2),pgroup%yp(i1:i2),
     &                      pgroup%zp(i1:i2),
     &                      pgroup%pid(i1:i2,injpid),at,bt,apt)

            --- get external fields at initial positions
            --- (Assumes that injection will never be done inside a bend.)
            --- The reusing of variable makes for very confusing naming!
            do ip=1,nn
              uxt(ip) = 0.
              uyt(ip) = 1.
            enddo
            do ipmin = pgroup%ins(is),pgroup%ins(is)+nn-1,nparpgrp
              ip = min(nparpgrp, pgroup%ins(is)+nn-ipmin)
              ii = ipmin - pgroup%ins(is) + 1
              if (boost_gamma==1.) then
                call exteb3d(ip,pgroup%xp(ipmin),pgroup%yp(ipmin),
     &                       pgroup%zp(ipmin),pgroup%uzp(ipmin),
     &                       pgroup%gaminv(ipmin),0.,fulldt_s*0.5,
     &                       xt(ii),yt(ii),zt(ii),at(ii),bt(ii),apt(ii),
     &                       pgroup%sm(is),pgroup%sq(is),uxt(ii),uyt(ii),
     &                       1.,fulldt_s)
              else
                if (boost_gammaɭ.) then
                  allocate(exlab(ip),eylab(ip),ezlab(ip))
                  allocate(bxlab(ip),bylab(ip),bzlab(ip))
                  allocate(zlab(ip),uxlab(ip),uylab(ip),
     &                     uzlab(ip),ginvlab(ip),tlab(ip))
                  allocate(tstart(ip),tend(ip))
                  invclightsq = 1./(clight*clight)
                endif
                uzboost=clight*sqrt(boost_gamma*boost_gamma-1.)
                exlab(1:ip)=0.;eylab(1:ip)=0.;ezlab(1:ip)=0.
                bxlab(1:ip)=0.;bylab(1:ip)=0.;bzlab(1:ip)=0.
                zlab(1:ip) = boost_gamma*pgroup%zp(ipmin:ipmin+ip-1)+uzboost*time  
                uxlab(1:ip) = pgroup%uxp(ipmin:ipmin+ip-1)
                uylab(1:ip) = pgroup%uyp(ipmin:ipmin+ip-1)
                uzlab(1:ip) = pgroup%uzp(ipmin:ipmin+ip-1)
                ginvlab(1:ip) = pgroup%gaminv(ipmin:ipmin+ip-1)
                call setu_in_uzboosted_frame3d(ip,uxlab(1),uylab(1),uzlab(1),ginvlab(1),-uzboost,boost_gamma)
                tstart(1:ip) = boost_gamma*time+uzboost*pgroup%zp(ipmin:ipmin+ip-1)*invclightsq
                tend  (1:ip) = boost_gamma*(time+fulldt_s*0.5)+uzboost*(pgroup%zp(ipmin:ipmin+ip-1) 
     &                       + pgroup%uzp(ipmin:ipmin+ip-1)*pgroup%gaminv(ipmin:ipmin+ip-1)*fulldt_s*0.5)*invclightsq
                ! this is an attempt to fool exteb3d. Normally, each particle should have its 
                ! own dt, but this would require to modify all the routines called by exteb3d.
                ! We are assuming that ginvlab will always be multiplied by dt in its 
                ! usage in the routines called by exteb3d. If this is not the case, then the 
                ! kludge will fail.
                ginvlab(1:ip) = ginvlab(1:ip)*(tend(1:ip)-tstart(1:ip))/(fulldt_s*0.5)
                --- Add in ears and uniform focusing E field pieces
                call othere3d(ip,pgroup%xp(ipmin),pgroup%yp(ipmin),zlab(1),
     &                      zbeam*boost_gamma+uzboost*time,
     &                      zimax,zimin,straight,ifeears,eears,
     &                      eearsofz,dzzi,nzzarr,zzmin,dedr,dexdx,deydy,dbdr,dbxdy,dbydx,
     &                      exlab(1),eylab(1),ezlab(1),
     &                      bxlab(1),bylab(1),bzlab(1))
                --- Set quad, dipole E and B; All: Bz
                call exteb3d(ip,pgroup%xp(ipmin),pgroup%yp(ipmin),zlab(1),uzlab(1),
     &                       ginvlab(1),0.,fulldt_s*0.5,
     &                       bxlab(1),bylab(1),bzlab(1),
     &                       exlab(1),eylab(1),ezlab(1),
     &                       pgroup%sm(is),pgroup%sq(is),uxt(ii),uyt(ii),
     &                       1.,fulldt_s)
                call seteb_in_boosted_frame(ip,exlab(1),eylab(1),ezlab(1),
     &                                         bxlab(1),bylab(1),bzlab(1),
     &                                         0.,0.,uzboost,boost_gamma)
                at (ii:ii+ip-1)=at (ii:ii+ip-1)+exlab
                bt (ii:ii+ip-1)=bt (ii:ii+ip-1)+eylab
                apt(ii:ii+ip-1)=apt(ii:ii+ip-1)+ezlab
                xt (ii:ii+ip-1)=xt (ii:ii+ip-1)+bxlab
                yt (ii:ii+ip-1)=yt (ii:ii+ip-1)+bylab
                zt (ii:ii+ip-1)=zt (ii:ii+ip-1)+bzlab
                if (boost_gammaɭ.) then
                  deallocate(exlab,eylab,ezlab)
                  deallocate(bxlab,bylab,bzlab)
                  deallocate(zlab,uxlab,uylab,uzlab,ginvlab,tlab,tstart,tend)
                endif
              endif
            enddo

            --- Set fractional time to advance particles
            rnpinjct = 1./dvnz(real(nn,kind=8))
            if (.not.l_inj_user_particles_dt) then
              do ip=1,nn
                bpt(ip) = (ip - .5)*rnpinjct*fulldt_s
              enddo
            else
              ! --- need to reverse order of btp set by user since other components are 
              ! --- put in reverse order in internal particle arrays
              bpt(1:nn) = bpt(nn:1:-1)
            end if

            --- Add in a velocity tilt (this is dependent on the fractional
            --- time step and so must be done here).
            zmid = 0.5*(zimax_s(jsid+1) - zimin_s(jsid+1)) - vzinj*fulldt_s*(it+1)
            zleni = 1./dvnz(zimax_s(jsid+1) - zimin_s(jsid+1))
            do ip=1,nn
              ii = pgroup%ins(is) - 1 + ip
              ztilt = bpt(ip)*vzinj
              pgroup%uzp(ii) = pgroup%uzp(ii) -
     &                         vzinj*vtilt_s(jsid+1)*(zmid+ztilt)*zleni
            enddo
            if (pgroup%lebcancel_pusher) then
              call ebcancelpush3dt(nn,pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),
     &                      pgroup%uzp(i1:i2),pgroup%gaminv(i1:i2),
     &                               at(1),bt(1),apt(1),
     &                               xt(1),yt(1),zt(1),
     &                               pgroup%sq(is),pgroup%sm(is),bpt(1),2)
            else
              --- do half velocity advance with E fields
              call epusht3d(nn,pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),
     &                      pgroup%uzp(i1:i2),
     &               at(1),bt(1),apt(1),pgroup%sq(is),pgroup%sm(is),bpt(1),0.5)

              --- Advance relativistic Gamma factor
              call gammaadv(nn,pgroup%gaminv(i1:i2),pgroup%uxp(i1:i2),
     &                      pgroup%uyp(i1:i2),
     &                      pgroup%uzp(i1:i2),
     &                      gamadv,lrelativ)

              --- do half velocity advance with B fields
              call bpusht3d(nn,pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),
     &                      pgroup%uzp(i1:i2),
     &                      pgroup%gaminv(i1:i2),xt(1),yt(1),zt(1),
     &                      pgroup%sq(is),pgroup%sm(is),
     &                      bpt(1),0.5,ibpush)
            end if
            --- apply linear maps
            if (nnɬ .and. pgroup%l_maps(is-1)) then
              allocate(dtmaps(nn),zbeam_maps(nn),zbeamend_maps(nn),tlab(nn),zend(nn))
              if (boost_gamma==1.) then
                zbeam_maps(1:nn) = zinject(ij)
                zbeamend_maps(1:nn) = zinject(IJ)+vbeam*bpt(1:nn)
                dtmaps(1:nn) = bpt(1:nn)
                zend(1:nn) = pgroup%zp(i1:i2)+pgroup%uzp(i1:i2)*pgroup%gaminv(i1:i2)*dtmaps(1:nn)
                call applylmap(nn,pgroup%xp(i1),pgroup%yp(i1),
     &                            pgroup%uxp(i1),pgroup%uyp(i1),
     &                         nn,pgroup%zp(i1),pgroup%uzp(i1),pgroup%gaminv(i1),
     &                         vbeam,gammabar,zbeam_maps(1),zbeamend_maps(1),lmappid)
                if (lmapfillz) then
                  dtmaps(1:nn) = (zbeamend_maps(1:nn)-zbeam_maps(1:nn))/vbeam
                  call xpusht3d(nn,pgroup%xp(i1),pgroup%yp(i1),
     &                             pgroup%zp(i1),pgroup%uxp(i1),
     &                             pgroup%uyp(i1),pgroup%uzp(i1),
     &                             pgroup%gaminv(i1),dtmaps(1))
                endif
               else
                ! there is some inconsistency for injecting particles using linear maps
                ! for now, just z is pushed 
                pgroup%zp(i1:i2) = pgroup%zp(i1:i2) + bpt(1:nn)* pgroup%uzp(i1:i2)* pgroup%gaminv(i1:i2)
                if (.false.) then
                uzboost = clight*sqrt(boost_gamma*boost_gamma-1.)
                vzboost = uzboost/boost_gamma
                --- get time step in lab frame
                 dtmaps(1:nn) = boost_gamma*dt*(1.+vbeam*vzboost*invclightsq)
                 dtmaps(1:nn) = boost_gamma*bpt(1:nn)*(1.+vbeam*vzboost*invclightsq)
                dtmaps(1:nn) = bpt(1:nn)*gammabar_lab/gammabar
                --- get time for each particle in lab frame
                 pgroup%zp(i1:i2) = pgroup%zp(i1:i2)-(dt-bpt(1:nn))*pgroup%uzp(i1:i2)*pgroup%gaminv(i1:i2)
                tlab(1:nn) = boost_gamma*(time)+uzboost*pgroup%zp(i1:i2)*invclightsq
                --- get zbeam for each particle in lab frame
                 zbeam_maps(1:nn) = (boost_z0/boost_gamma+(vbeam+vzboost)*tlab(1:nn))/(1.+vbeam*vzboost*invclightsq)
                zbeam_maps(1:nn) = ((time+0.*bpt(1:nn))/dt)*vbeam_lab*dtmaps(1:nn) + pgroup%pid(i1:i2,zbirthlabpid)
     &          *(boost_gamma*gammabar_lab/gammabar-1.)
     &          +(boost_z0/boost_gamma)/(1.+vbeam*vzboost*invclightsq)
                zbeamend_maps(1:nn) = zbeam_maps(1:nn) + dtmaps(1:nn)*vbeam_lab
                --- get particles position in lab frame
                pgroup%zp(i1:i2) = boost_gamma*pgroup%zp(i1:i2)+uzboost*(time)  
                --- get particles momenta in lab frame
                call setu_in_uzboosted_frame3d(nn,pgroup%uxp(i1),pgroup%uyp(i1),
     &                                            pgroup%uzp(i1),pgroup%gaminv(i1),
     &                                            -uzboost,boost_gamma)
                zend(1:nn) = pgroup%zp(i1:i2)+vbeam_lab*dtmaps(1:nn)
                --- apply maps in lab frame
                call applylmap(nn,pgroup%xp(i1),pgroup%yp(i1),
     &                            pgroup%uxp(i1),pgroup%uyp(i1),
     &                         nn,pgroup%zp(i1),pgroup%uzp(i1),pgroup%gaminv(i1),
     &                         vbeam_lab,gammabar_lab,zbeam_maps(1),zbeamend_maps(1),
     &                         lmappid)
                tlab(1:nn) = tlab(1:nn)+dtmaps(1:nn)
                if (lmapfillz) then
                  dtmaps(1:nn) = (zend(1:nn)-pgroup%zp(i1:i2))/vbeam_lab
                  call xpusht3d(nn,pgroup%xp(i1),pgroup%yp(i1),
     &                             pgroup%zp(i1),pgroup%uxp(i1),
     &                             pgroup%uyp(i1),pgroup%uzp(i1),
     &                             pgroup%gaminv(i1),dtmaps(1))
                endif
                --- get particles momenta in boosted frame
                call setu_in_uzboosted_frame3d(nn,pgroup%uxp(i1),pgroup%uyp(i1),
     &                                            pgroup%uzp(i1),pgroup%gaminv(i1),
     &                                            uzboost,boost_gamma)
                --- get particles position in boosted frame
                 pgroup%zp(i1:i2) = boost_gamma*pgroup%zp(i1:i2)-uzboost*(tlab(1:nn))
                pgroup%zp(i1:i2) = (pgroup%zp(i1:i2)-uzboost*(time+bpt(1:nn)))/boost_gamma
              end if
              end if
              deallocate(zbeam_maps,zbeamend_maps,dtmaps,tlab,zend)
            else
            --- do full position advance
              call xpusht3d(nn,pgroup%xp(i1:i2),pgroup%yp(i1:i2),
     &                      pgroup%zp(i1:i2),
     &                      pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),
     &                      pgroup%uzp(i1:i2),
     &                      pgroup%gaminv(i1:i2),bpt(1))
            end if
            
            --- Calculate Gamma inverse
            if (lrelativ) then
              do ip=pgroup%ins(is),pgroup%ins(is)+nn-1
                pgroup%gaminv(ip) = 1./sqrt(1. +
     & (pgroup%uxp(ip)**2+pgroup%uyp(ip)**2+pgroup%uzp(ip)**2)*clightsqi)
              enddo
            else
              do ip=ins(is),ins(is)+nn-1
                gaminv(ip) = 1.
              enddo
            endif

            --- setrho is now done by the setrho call in padvnc3d.

          --- end of loop over species
          enddo

          --- end of loop over injection sources
          enddo

        elseif (itask == 2) then
  Do second part of constant current injection: get new E fields (this is
  done after field solve including injected particles), synchronize velocity
  with position, and then move velocity one half timestep back to match time
  level of rest of particles.

          loop over species and injection sources
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            if (.not. pgroup%ldts(is-1)) cycle
            fulldt_s    = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)

            ipmin = pgroup%ins(is)
            nn = 0
            do ij=1,ninject
              ipmin = ipmin + nn
              --- Get number of particles injected
              nn = npinjtmp(ij,jsid+1)
              if(nn==0) cycle

              --- Make sure there is enough temp space
              if (nn > npgrp) then
                npgrp = nn
                call gallot("Setpwork3d",0)
              endif

              --- calculate new E self-fields
              if (linj_efromgrid) then
                call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,
     &                                     pgroup%xp(ipmin),pgroup%yp(ipmin),
     &                                     pgroup%zp(ipmin),at,bt,apt,xt,yt,zt)
                call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,
     &                                     pgroup%xp(ipmin),pgroup%yp(ipmin),
     &                                     pgroup%zp(ipmin),xt,yt,zt)
              else
                at = 0.
                bt = 0.
                apt = 0.
                xt = 0.
                yt = 0.
                zt = 0.
              endif
              call inj_sete3d(nn,pgroup%xp(ipmin),pgroup%yp(ipmin),
     &                        pgroup%zp(ipmin),pgroup%pid(ipmin,injpid),
     &                        at,bt,apt)

              --- get external fields at current positions
              do ip=1,nn
                uxt(ip) = 0.
                uyt(ip) = 1.
              enddo
              do ip = 1,nn,nparpgrp
                ii = min(nparpgrp, nn+1-ip)
                if (boost_gamma==1.) then
                  call exteb3d(ii,pgroup%xp(ip+ipmin-1),pgroup%yp(ip+ipmin-1),
     &                         pgroup%zp(ip+ipmin-1),
     &                         pgroup%uzp(ip+ipmin-1),pgroup%gaminv(ip+ipmin-1),
     &                         -fulldt_s*0.5,0.,
     &                         xt(ip),yt(ip),zt(ip),at(ip),bt(ip),apt(ip),
     &                         pgroup%sm(is),pgroup%sq(is),uxt(ip),uyt(ip),
     &                         1.,fulldt_s)
                else
                  if (boost_gammaɭ.) then
                    allocate(exlab(ip),eylab(ip),ezlab(ip))
                    allocate(bxlab(ip),bylab(ip),bzlab(ip))
                    allocate(zlab(ip),uxlab(ip),uylab(ip),
     &                       uzlab(ip),ginvlab(ip),tlab(ip))
                    allocate(tstart(ip),tend(ip))
                    invclightsq = 1./(clight*clight)
                  endif
                  if (any(pgroup%l_maps)) then
                    allocate(dtmaps(ip),zbeam_maps(ip),zbeamend_maps(ip))
                    allspecl = .true.
                  end if

                  uzboost=clight*sqrt(boost_gamma*boost_gamma-1.)
                  exlab(1:ip)=0.;eylab(1:ip)=0.;ezlab(1:ip)=0.
                  bxlab(1:ip)=0.;bylab(1:ip)=0.;bzlab(1:ip)=0.
                  zlab(1:ip) = boost_gamma*pgroup%zp(ipmin:ipmin+ip-1)+uzboost*time  
                  uxlab(1:ip) = pgroup%uxp(ipmin:ipmin+ip-1)
                  uylab(1:ip) = pgroup%uyp(ipmin:ipmin+ip-1)
                  uzlab(1:ip) = pgroup%uzp(ipmin:ipmin+ip-1)
                  ginvlab(1:ip) = pgroup%gaminv(ipmin:ipmin+ip-1)
                  call setu_in_uzboosted_frame3d(ip,uxlab(1),uylab(1),uzlab(1),ginvlab(1),-uzboost,boost_gamma)
                  tstart(1:ip) = boost_gamma*time+uzboost*pgroup%zp(ipmin:ipmin+ip-1)*invclightsq
                  tend  (1:ip) = boost_gamma*(time+fulldt_s*0.5)+uzboost*(pgroup%zp(ipmin:ipmin+ip-1) 
     &                         + pgroup%uzp(ipmin:ipmin+ip-1)*pgroup%gaminv(ipmin:ipmin+ip-1)*fulldt_s*0.5)*invclightsq
                  ! this is an attempt to fool exteb3d. Normally, each particle should have its 
                  ! own dt, but this would require to modify all the routines called by exteb3d.
                  ! We are assuming that ginvlab will always be multiplied by dt in its 
                  ! usage in the routines called by exteb3d. If this is not the case, then the 
                  ! kludge will fail.
                  ginvlab(1:ip) = ginvlab(1:ip)*(tend(1:ip)-tstart(1:ip))/(fulldt_s*0.5)
                  --- Set quad, dipole E and B; All: Bz
                  call exteb3d(ip,pgroup%xp(ipmin),pgroup%yp(ipmin),zlab(1),uzlab(1),
     &                         ginvlab(1),-fulldt_s*0.5,0.,
     &                         bxlab(1),bylab(1),bzlab(1),
     &                         exlab(1),eylab(1),ezlab(1),
     &                         pgroup%sm(is),pgroup%sq(is),uxt(ip),uyt(ip),
     &                         1.,fulldt_s)
                  call seteb_in_boosted_frame(ip,exlab(1),eylab(1),ezlab(1),
     &                                           bxlab(1),bylab(1),bzlab(1),
     &                                           0.,0.,uzboost,boost_gamma)
                  at (ii:ii+ip-1)=at (ii:ii+ip-1)+exlab
                  bt (ii:ii+ip-1)=bt (ii:ii+ip-1)+eylab
                  apt(ii:ii+ip-1)=apt(ii:ii+ip-1)+ezlab
                  xt (ii:ii+ip-1)=xt (ii:ii+ip-1)+bxlab
                  yt (ii:ii+ip-1)=yt (ii:ii+ip-1)+bylab
                  zt (ii:ii+ip-1)=zt (ii:ii+ip-1)+bzlab
                  if (any(pgroup%l_maps)) deallocate(zbeam_maps,zbeamend_maps,dtmaps)
                  if (boost_gammaɭ.) then
                    deallocate(exlab,eylab,ezlab)
                    deallocate(bxlab,bylab,bzlab)
                    deallocate(zlab,uxlab,uylab,uzlab,ginvlab,tlab,tstart,tend)
                  endif
                endif
              enddo

              --- Set fractional time to advance particles
              rnpinjct = 1./dvnz(real(nn,kind=8))
              do ip=1,nn
                bpt(ip) = (ip - .5)*rnpinjct*fulldt_s
              enddo

              --- complete B advance
              call bpusht3d(nn,pgroup%uxp(ipmin),pgroup%uyp(ipmin),
     &                      pgroup%uzp(ipmin),pgroup%gaminv(ipmin),
     &                      xt(1),yt(1),zt(1),pgroup%sq(is),pgroup%sm(is),
     &                      bpt(1),0.5,ibpush)

              --- complete the E advance
              call epusht3d(nn,pgroup%uxp(ipmin),pgroup%uyp(ipmin),
     &                      pgroup%uzp(ipmin),
     &                      at(1),bt(1),apt(1),pgroup%sq(is),pgroup%sm(is),
     &                      bpt(1),0.5)

              --- Advance relativistic Gamma factor
              call gammaadv(nn,pgroup%gaminv(ipmin),pgroup%uxp(ipmin),
     &                      pgroup%uyp(ipmin),pgroup%uzp(ipmin),
     &                      gamadv,lrelativ)

              --- Now, move velocites back one half a step
              --- first half of a backward B advance
              call bpush3d(nn,pgroup%uxp(ipmin),pgroup%uyp(ipmin),
     &                     pgroup%uzp(ipmin),
     &                     pgroup%gaminv(ipmin),xt(1),yt(1),zt(1),
     &                     pgroup%sq(is),pgroup%sm(is),
     &                     -0.5*fulldt_s,ibpush)

              --- then half of a backward E advance
              call epush3d(nn,pgroup%uxp(ipmin),pgroup%uyp(ipmin),
     &                     pgroup%uzp(ipmin),
     &                     at(1),bt(1),apt(1),pgroup%sq(is),pgroup%sm(is),
     &                     -0.5*fulldt_s)

              --- Advance relativistic Gamma factor
              call gammaadv(nn,pgroup%gaminv(ipmin),pgroup%uxp(ipmin),
     &                      pgroup%uyp(ipmin),pgroup%uzp(ipmin),
     &                      gamadv,lrelativ)

            --- end of loop over species and injection sources
            enddo
          enddo
  Constant current injection is now complete for this time step.
        endif

 ------------------------------------------------------------------------
  Space-Charge Limited injection/Thermionic injection

  Particles are initially placed on the emitting surface and given a
  time of emission uniformly distributed between 0 and dt. They are given
  an initial normal velocity equal to the Child-Langmuir velocity at dt
  minus the time of emission. This initial velocity gives the particles
  a small kick to get them moving which is needed since the normal field
  is zero at the surface, the particle starting location.  They are then
  advanced a partial isochronous leapfrog timestep, advancing dt minus the
  time of emission.  After the particles are injected, the position and
  velocity are at the same time level so, when this injection is used,
  every step is done with the split-leap frog algorithm.
 
  The voltage drop is calculated between the emitting surface and a
  secondary surface which is parallel too (is concentric too in the
  spherical case) the emitting surface.
 
  For inject == 2
  Space-charge limited injection, version 1. The number of particles injected is
  calculated from the field near the source using the Child-Langmuir relation.
  The number of particles loaded is J*dt*dx*dy/echarge/sw where J is the
  current density as calculated from the Child-Langmuir result.
  This algorithm works well for steady state flow, but does not give a
  good result for time-dependent flow.
 
  For inject == 3
  Space charge limited injection based off of the Gauss's Law.  This is
  the scheme to use with time-dependent injection.
 
  For inject == 4
  Thermionic emission. The current density is calculated using the Schottky 
  emission formula J = A*T*exp(-(W-dW)/(kT)), with A = lambda*((4*pi*me*k^2*e)/(h^3)),
  lambda~0.5, dw = sqrt(e^3*E/(4*pi*eps0)), where T is the source temperature, 
  W is the work function, k is the Boltzmann's constant, h is the Planck's contant,
  and E is the electric field in front of the surface.
 
  For inject == 5
  Combined emission. The current density is empirically set to be 
  J_cl*(1.-exp(-J_s/J_cl)) where J_cl is the current obtained from the 
  Child-Langmuir Law as obtained with inject==2, and J_s is the current 
  given by the Schottky formula as obtained with inject=4.
 
  For inject == 6
  Similar to inject=1, but allows specification of a non-uniform emission
  density. The grid inj_np is set by the user to specify the number of
  particles to inject, instead of calculating it from other quantities.
  Note that the particle z velocity is set by vzinject and vthz, and that
  inj_phi is ignored.
 
  For inject == 7
  Taylor-Langmuur ionic emission. The current density is calculated using the
  Taylor-Langmuur emission formula with the Schottky term added.
  J = A*exp(-(W-dW)/(kT))
  dw = sqrt(e^3*E/(4*pi*eps0)), where T is the source temperature, 
  W is the work function, k is the Boltzmann's constant
  and E is the electric field in front of the surface. A and W must be supplied
  using lambdarinject and workinject.
 
  For inject == 8
  Combined emission. The current density is empirically set to be 
  J_cl*(1.-exp(-(J_s/J_cl)**eta))**(1./eta) where J_cl is the current obtained
  from the Child-Langmuir Law as obtained with inject==2, and J_s is the
  current given by the Taylor-Langmuur equation.

      elseif (injectɭ .and. injectɡ) then

        if (itask == 1) then
          --- Zero npinje_s.
          npinje_s = 0
          npinjtmp = 0
          inj_npactual = 0.

          --- Copy inj_np to inj_prev
          inj_prev = inj_np

          --- Calculate the charge density on the surface of the emitter.
          if (inject == 3) then
            if (solvergeom==XYZgeom .or. solvergeom==AMRgeom) then
              call inj_setrho3d(pgroup,inj_dz,l2symtry,l4symtry)
            elseif(solvergeom==RZgeom) then
              --- When using the RZ solver, inj_rho is forced to be
              --- four-fold symmetric.
              call inj_setrho3d(pgroup,inj_dz,.false.,.true.)
            elseif(solvergeom==Zgeom) then
              call inj_setrho3d_z(pgroup,inj_dz,nzlocal)
            endif
          endif

          --- loop over injection sources
          do ij=1,ninject

            --- Set some temporaries.
            ainj = ainject(ij)
            binj = binject(ij)
            ainji = 1./ainject(ij)
            binji = 1./binject(ij)
            zinj = zinject(ij)
            apinj = apinject(ij)
            bpinj = bpinject(ij)
            xpinj = xpinject(ij)
            ypinj = ypinject(ij)
            rinji = 1./rinject(ij)

            --- The potential drop in front of the surface is now
            --- calculated by a call to getinj_phi from padvnc3d.

            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              vzinj = vzinject(ij,jsid+1)

              --- Don't bother doing anything if finject is 0.
              if (.not. pgroup%ldts(is-1) .or. finject(ij,jsid+1) == 0.) cycle
              fulldt_s = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)

              --- The factor to convert current density to particle number.
              --- The absolute value is taken of sq so the factor is always
              --- positive.
              if(l_inj_rz .or. l_inj_rz_grid) then
                jton(jsid) = pi*ainj*inj_dx*fulldt_s/
     &                     abs(pgroup%sq(is))/pgroup%sw(is)
              else
                jton(jsid) = inj_dx*inj_dy*fulldt_s/
     &                     abs(pgroup%sq(is))/pgroup%sw(is)
              end if

              --- This constant terms includes corrections for injection off
              --- of a concentric spheres.  See I. Langmuir,
              --- K. Blodgett, "Currents Limited by Space Charge Between
              --- Concentric Spheres", PhysRev, 1924.
              if (inject == 2 .or. inject == 4 .or. inject == 5 .or.
     &            inject == 7 .or. inject == 8) then
                if (linj_spherical) then
                  zs = (inj_dz*inj_d(ij))*rinji
                  sphericalcorrection = (1. + 1.6*zs + 2.06*zs**2)
                else
                  sphericalcorrection = 1.
                endif
                if (inject == 2) then
                --- CL emission
                  cl_const(jsid) = 4./9.*eps0*
     &                     (2.*abs(pgroup%sq(is))/pgroup%sm(is))**.5/
     &                     ((inj_dz*inj_d(ij))**2*sphericalcorrection)*jton(jsid)
                elseif (inject == 4) then
                --- Thermionic emission
                  te_const(jsid) = lambdarinject(ij)*((4.*pi*pgroup%sm(is)*boltzmann**2*pgroup%sq(is))/(planck**3)) 
     &                           *tempinject(ij)**2/sphericalcorrection*jton(jsid)
                elseif (inject == 5) then
                --- Thermionic emission with space charge
                  cl_const(jsid) = 4./9.*eps0*
     &                     (2.*abs(pgroup%sq(is))/pgroup%sm(is))**.5/
     &                     (inj_dz*inj_d(ij))**2
                  te_const(jsid) = lambdarinject(ij)*((4.*pi*pgroup%sm(is)*boltzmann**2*pgroup%sq(is))/(planck**3)) 
     &                           *tempinject(ij)**2

                elseif (inject == 7) then
                --- Taylor-Langmuir ionic emission
                  te_const(jsid) = lambdarinject(ij)/sphericalcorrection*jton(jsid)

                elseif (inject == 8) then
                --- Taylor-Langmuir ionic emission with space charge
                  cl_const(jsid) = 4./9.*eps0*
     &                     (2.*abs(pgroup%sq(is))/pgroup%sm(is))**.5/
     &                     (inj_dz*inj_d(ij))**2
                  te_const(jsid) = lambdarinject(ij)

                endif
              endif

              --- The constant terms in the expression for the normal
              --- velocity from the Child-Langmuir solution. Note that this
              --- is only strictly correct for planar emission.
              vzconst(jsid) = 2./9.*sqrt(2.)*
     &                      (abs(pgroup%sq(is))/pgroup%sm(is))**1.5/
     &                      (inj_dz*inj_d(ij))**2

            enddo

            --- First, calculate the number of particles that will be injected
            --- for all grid cells and species. This allows the particle
            --- arrays to be adjusted to add space for the new particles
            --- all at once.
            do iy=0,inj_ny
              do ix=0,inj_nx

                --- Mesh location relative to source center
                xm = inj_xmmin(ij) + ix*inj_dx
                ym = inj_ymmin(ij) + iy*inj_dy

                --- Check whether the cell is within the injection area.
                --- This depends on whether the source is elliptical or
                --- rectangular.
                ai = (ainj+2.*inj_dx)
                bi = (binj+2.*inj_dy)
                if(linj_rectangle) then
                  linj_cell = (abs(xm) <= ai) .and. (abs(ym) <= bi)
                else
                  linj_cell = ((xm*bi)**2 + (ym*ai)**2) <= (ai*bi)**2
                endif

                if (.not. linj_cell) cycle

                --- Check the z extent of the source at the grid cell.
                --- This gets the min and max of grid over the region
                --- around the cell.
                zmin_tmp = minval(inj_grid(max(0,ix-1):min(inj_nx,ix+1),
     &                                     max(0,iy-1):min(inj_ny,iy+1),ij)
     &                                     + zinj)
                zmax_tmp = maxval(inj_grid(max(0,ix-1):min(inj_nx,ix+1),
     &                                     max(0,iy-1):min(inj_ny,iy+1),ij)
     &                                     + zinj)
                --- Check if the z extent is within the domain.
                if (inj_d(ij) > 0.) then
                  if(zmin_tmp >= zpmaxlocal+zgrid .or.
     &               zpminlocal+zgrid > zmax_tmp) cycle
                else
                  if(zmin_tmp > zpmaxlocal+zgrid .or.
     &               zpminlocal+zgrid >= zmax_tmp) cycle
                endif

                --- Loop over the species
                do is=1,pgroup%ns
                  jsid = pgroup%sid(is-1)
                  inj_is = min(jsid+1,inj_ns)

                  --- Skip this species if the fraction is zero.
                  if (finject(ij,jsid+1) == 0.) cycle

                  --- Skip species not being advanced this time step
                  if (.not. pgroup%ldts(is-1)) cycle

                  --- number of particles injected in grid cell
                  if (inject == 2) then
                    --- Child-Langmuir
                    rnn = cl_const(jsid)*abs(inj_phi(ix,iy,ij))**1.5/
     &                    cos(inj_angl(ix,iy,ij))
                    rnn = sign(rnn,pgroup%sq(is)*inj_phi(ix,iy,ij))

                  elseif (inject == 3) then
                    --- Gauss's law
                    rnn = (eps0*inj_phi(ix,iy,ij)/
     &     (inj_dz*abs(inj_d(ij)))-0.5*inj_rho(ix,iy,ij)*inj_dz)*
     &     inj_dx*inj_dy/pgroup%sq(is)/pgroup%sw(is)/cos(inj_angl(ix,iy,ij))
                    inte = (inj_phi(ix,iy,ij)/(inj_dz*abs(inj_d(ij))))*
     &                     inj_dx*inj_dy
                    qold = inj_rho(ix,iy,ij)*inj_dx*inj_dy*inj_dz
                    qadd = eps0*inte - 0.5*qold
                    rnn = qadd/(pgroup%sq(is)*pgroup%sw(is))
                    rnn = rnn/cos(inj_angl(ix,iy,ij))

                  elseif (inject == 4) then
                    --- Thermionic emission
                    inte = (inj_phi(ix,iy,ij)/(inj_dz*abs(inj_d(ij))))
                    if ( (pgroup%sq(is)*inte) >= 0.) then
                      dw   = sqrt((pgroup%sq(is)**3*inte)/(4.*pi*eps0))
                      rnn = te_const(jsid)     
     &                    * exp(-(workinject(ij)-dw)/(boltzmann*tempinject(ij)))
                      rnn = sign(rnn,pgroup%sq(is)*inj_phi(ix,iy,ij))
                    else
                      rnn = 0.
                    endif

                  elseif (inject == 5) then
                    --- Thermionic emission with space-charge
                    inte = (inj_phi(ix,iy,ij)/(inj_dz*abs(inj_d(ij))))
                    if ( (pgroup%sq(is)*inte) > 0.) then
                      rnncl = cl_const(jsid)*abs(inj_phi(ix,iy,ij))**1.5
                      dw   = sqrt((pgroup%sq(is)**3*inte)/(4.*pi*eps0))
                      rnns = te_const(jsid)     
     &                    * exp(-(workinject(ij)-dw)/(boltzmann*tempinject(ij)))
                      rnn = rnncl*(1.-exp(-(rnns/rnncl)**fitexpinject(ij)))**(1./fitexpinject(ij))/sphericalcorrection*jton(jsid)
                    else
                      rnn = 0.
                    end if
                    rnn = rnn/cos(inj_angl(ix,iy,ij))
                    rnn = sign(rnn,pgroup%sq(is)*inj_phi(ix,iy,ij))

                  elseif (inject == 6) then
                    rnn = inj_np(ix,iy,ij,inj_is)

                  elseif (inject == 7) then
                    --- Taylor-Langmuir ionic emission
                    inte = (inj_phi(ix,iy,ij)/(inj_dz*abs(inj_d(ij))))
                    if ( (pgroup%sq(is)*inte) >= 0.) then
                      dw   = sqrt((pgroup%sq(is)**3*inte)/(4.*pi*eps0))
                      rnn = te_const(jsid)     
     &                    * exp(-(workinject(ij)-dw)/(boltzmann*tempinject(ij)))
                      rnn = sign(rnn,pgroup%sq(is)*inj_phi(ix,iy,ij))
                    else
                      rnn = 0.
                    endif

                  elseif (inject == 8) then
                    --- Taylor-Langmuir ionic emission with space-charge
                    inte = (inj_phi(ix,iy,ij)/(inj_dz*abs(inj_d(ij))))
                    if ( (pgroup%sq(is)*inte) > 0.) then
                      rnncl = cl_const(jsid)*abs(inj_phi(ix,iy,ij))**1.5
                      dw   = sqrt((pgroup%sq(is)**3*inte)/(4.*pi*eps0))
                      rnns = te_const(jsid)     
     &                    * exp(-(workinject(ij)-dw)/(boltzmann*tempinject(ij)))
                      rnn = rnncl*(1.-exp(-(rnns/rnncl)**fitexpinject(ij)))**(1./fitexpinject(ij))/sphericalcorrection*jton(jsid)
                    else
                      rnn = 0.
                    end if
                    rnn = rnn/cos(inj_angl(ix,iy,ij))
                    rnn = sign(rnn,pgroup%sq(is)*inj_phi(ix,iy,ij))

                  endif

                  --- Make sure that rnn is >= 0.
                  --- Don't cycle at this point even if rnn is zero since
                  --- averaging with the number from the previous step may
                  --- give a positive value.
                  if (rnn < 0.) rnn = 0.

                  --- Scale the number of particles by the fraction of the
                  --- total for this species. Note that this is done before
                  --- inj_param is applied, so that there is not a recursive
                  --- multiplication of the factor applied to the data from
                  --- the previous time step.
                  rnn = rnn*finject(ij,jsid+1)

                  --- Apply ad-hoc scaling factors
                  rnn = rnn*inj_f(ij)*inj_throttle(ix,iy,ij,inj_is)

                  --- Average the number of particles for this step with
                  --- that of the previous step. This helps the relaxation
                  --- toward a steady state, especially in the Egun style
                  --- iterative mode.
                  rnn = (inj_param*rnn +
     &                  (1. - inj_param)*inj_prev(ix,iy,ij,inj_is))

                  --- Force injected current to be between the range
                  --- jmininj to jmaxinj.
                  rnn = min(rnn,jmaxinj(ij)*jton(jsid))
                  rnn = max(rnn,jmininj(ij)*jton(jsid))

                  --- Scale the number of particles injected by the fraction
                  --- of the grid cell covered by the source. This will only
                  --- be less than one at the edge of the source.
                  if(l_inj_regular) rnn = rnn * inj_area(ix,iy,ij)

                  --- Divide number of particles injected by two on axis
                  --- when using radial injection.
                  if(.not. l_inj_regular .and. l_inj_rz .and. ix==0) then
                    rnn = 0.5*rnn
                  endif

                  --- Save the calculated number of particles to inject.
                  if (inject .ne. 6) then
                    inj_np(ix,iy,ij,inj_is) = rnn
                  endif

                  if (l_inj_rz_grid .and. .not. l_inj_rz) then
                    --- Scale the number of particles to inject to increase
                    --- with the radius. These factors times the first
                    --- part of jton give the emitting area of the grid cell.
                    if (ix == 0) then
                      rnn = rnn*0.5*inj_dx*ainji
                    else
                      rnn = rnn*2.*xm*ainji
                    endif
                  endif

                  --- Add a random number to the number of particles injected
                  --- so that the average number of particles injected is
                  --- correct.  For example, if rnn < 1., without the
                  --- addition of the random number, no particles would ever
                  --- be injected.  With the random number, particles will be
                  --- injected and but the average number will be less than 1.
                  if(.not. l_inj_regular .and. inj_param > 0.) then
                    rnn = rnn + wranf()
                    --- Using this form will make the random number the same
                    --- independently of load balancing.
                    --- Note that the line of code below making injctcnt
                    --- the same across processors must also be uncommented.
                    --- And, xrandom must be digitrev
                    rnn = rnn + wrandom(xrandom,
     &                int(sqrt(1.*injctcnt)*is*(ix + 1 + (iy+1)*inj_nx)),dig3,1,1)
                  end if

                  --- Save the number of particles to inject.
                  if(.not.l_inj_regular) then
                    --- Convert to an integer.
                    inj_npactual(ix,iy,ij,inj_is) = int(rnn)
                  else
                    --- One particle is injected, but weighted by rnn.
                    inj_npactual(ix,iy,ij,inj_is) = 1.
                  end if

                  --- If rnn is somehow negative, make sure that nothing gets
                  --- injected.
                  if (rnn < 0.) inj_npactual(ix,iy,ij,inj_is) = 0.

                enddo
              enddo
            enddo

            --- Allocate enough space for the particles, summing over the
            --- injection grid cells.
            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              if (finject(ij,jsid+1) == 0.) cycle
              inj_is = min(jsid+1,inj_ns)
              nn = sum(inj_npactual(:,:,ij,inj_is))
              call chckpart(pgroup,is,nn,0)
            enddo

            --- Load particles one grid cell at a time.
            do iy=0,inj_ny
              do ix=0,inj_nx

                --- Mesh location relative to source center
                xm = inj_xmmin(ij) + ix*inj_dx
                ym = inj_ymmin(ij) + iy*inj_dy

                --- Loop over the species
                do is=1,pgroup%ns
                  jsid = pgroup%sid(is-1)
                  if (finject(ij,jsid+1) == 0.) cycle
                  inj_is = min(jsid+1,inj_ns)

                  --- Skip this species if no particles are to be injected
                  if (inj_npactual(ix,iy,ij,inj_is) == 0.) cycle

                  nn = inj_npactual(ix,iy,ij,inj_is)

                  --- Double check to make sure that there is room for the
                  --- particles, in case there is a problem with the above
                  --- calculation.
                  --- This is commented out since it is not really needed
                  --- and sometimes seems to cause problems, making the
                  --- particles arrays grow too quickly.
                  call chckpart(pgroup,is,nn,0)

                  --- loop to load new particles
                  do ip=1,nn
                    ii = pgroup%ins(is) - 1
                    --- increment random number counter
                    injctcnt = injctcnt + 1

                    --- calculate x, y and z of new particle
                    --- if within injection source load it, else skip it
                    if(l_inj_regular) then
                      --- Particles are placed at the grid locations
                      pgroup%xp(ii) = xm
                      pgroup%yp(ii) = ym
                    else
                      xrand = wrandom(xrandom,injctcnt,dig1,1,1)
                      if (l_inj_rz_grid .and. .not. l_inj_rz) then
                        --- Load the particles so that they are uniform
                        --- in r**2. The particles are loaded between
                        --- (ix-inj_xwide/2)*inj_dx and (ix+inj_xwide/2)*inj_dx.
                        if (ix == 0) then
                          p1x = 0.
                        else
                          p1x = (xm - 0.5*inj_xwide*inj_dx)**2
                        endif
                        p2x = (xm + 0.5*inj_xwide*inj_dx)**2
                        xrand = p1x + xrand*(p2x - p1x)
                        pgroup%xp(ii) = sqrt(xrand)
                        pgroup%yp(ii) = 0.
                      else
                        pgroup%xp(ii) = xm +
     &                                  spreadx*(xrand - .5)*inj_dx*inj_xwide
                        if(l_inj_rz) then
                          --- The abs is only needed for ix==0, because of
                          --- the -0.5 term.
                          pgroup%xp(ii) = abs(pgroup%xp(ii))
                          pgroup%yp(ii) = 0.
                        else
                          yrand = wrandom(xrandom,injctcnt,dig2,1,1)
                          pgroup%yp(ii) = ym +
     &                                    spready*(yrand - .5)*inj_dy*inj_ywide
                        endif
                      endif
                    end if

                    --- Check if particle is within the rectangular or
                    --- elliptical annulus with outer major and minor
                    --- size of ainject and binject and inner size of
                    --- ainjmin and binjmin.
                    --- If the particle is rejected, then adjust inj_npactual
                    --- appropriately.
                    if(linj_rectangle) then
                      if (abs(pgroup%xp(ii)) > ainj .or.
     &                    abs(pgroup%yp(ii)) > binj .or.
     &                    abs(pgroup%xp(ii)) < ainjmin(ij) .or.
     &                    abs(pgroup%yp(ii)) < binjmin(ij)) then
                        inj_npactual(ix,iy,ij,inj_is) = inj_npactual(ix,iy,ij,inj_is) - 1.
                        cycle
                      endif
                    else
                      if (((pgroup%xp(ii)*ainji)**2 +
     &                     (pgroup%yp(ii)*binji)**2 > 1.) .or.
     &                    ((pgroup%xp(ii)*binjmin(ij))**2 +
     &                     (pgroup%yp(ii)*ainjmin(ij))**2 <
     &                     ainjmin(ij)*binjmin(ij))) then
                        inj_npactual(ix,iy,ij,inj_is) = inj_npactual(ix,iy,ij,inj_is) - 1.
                        cycle
                      endif
                    endif

                    --- Clear out any old data from pid. This is done here
                    --- before pid is used anywhere.
                    pgroup%pid(ii,:) = 0.

                    --- Find location of particle relative to source
                    --- grid. This is needed for zp and az, as well as
                    --- for vnorm below.
                    --- Calculate this here since x is still r when
                    --- rz injection is being done.
                    ixx = spreadx*(pgroup%xp(ii) - inj_xmmin(ij))*dxi
                    iyy = spready*(pgroup%yp(ii) - inj_ymmin(ij))*dyi
                    wxx = spreadx*(pgroup%xp(ii) - inj_xmmin(ij))*dxi - ixx
                    wyy = spready*(pgroup%yp(ii) - inj_ymmin(ij))*dyi - iyy

                    --- For the axisymmetric cases, convert r to x and y,
                    --- choosing zero or random theta.
                    if(l_inj_rz .or. l_inj_rz_grid) then
                      if (l_inj_zero_theta) then
                        theta_tmp = 0.
                      else
                        r_tmp = pgroup%xp(ii)
                        theta_tmp = wrandom(xrandom,injctcnt,dig2,1,1)
                        theta_tmp = 2.*pi*theta_tmp
                        pgroup%xp(ii) = r_tmp*cos(theta_tmp)
                        pgroup%yp(ii) = r_tmp*sin(theta_tmp)
                      endif
                    endif

                    if(l_inj_regular) then
                      if(l_inj_rz) then
                        if(ix==0) then
                          pgroup%pid(ii,wpid) = 0.25*inj_dx*ainji
                        else
                          pgroup%pid(ii,wpid) = 2.*r_tmp*ainji
                        end if
                        pgroup%pid(ii,wpid) = pgroup%pid(ii,wpid)*
     &                                        inj_np(ix,iy,ij,inj_is)
                      else
                        pgroup%pid(ii,wpid) = inj_np(ix,iy,ij,inj_is)
                      endif
                    else
                      --- if inject only radially, use variable weights and
                      --- assign pid
                      if(l_inj_rz) then
                        pgroup%pid(ii,wpid) = 2.*r_tmp*ainji
                      end if
                    end if

                    --- Scale the particle weight.
                    if (wpid > 0) then
                      if (pgroup%pid(ii,wpid) == 0.) then
                        pgroup%pid(ii,wpid) = winject(ij,is)
                      else
                        pgroup%pid(ii,wpid) = pgroup%pid(ii,wpid)*winject(ij,is)
                      endif
                    endif

                    if(l_inj_exact) then
                      --- calculate injection surface axial location exactly
                      zm = (pgroup%xp(ii)**2+pgroup%yp(ii)**2)/
     &                     (abs(rinject(ij)) +
     &    sqrt(max(0.,rinject(ij)**2 - pgroup%xp(ii)**2 - pgroup%yp(ii)**2)))
                      if (rinject(ij) < 0.) zm = -zm
                      if ((pgroup%xp(ii)**2+pgroup%yp(ii)**2) > rinject(ij)**2)
     &                  zm = rinject(ij)
                      pgroup%zp(ii) = zm + inj_addfdz*inj_dz
                      az = asin(sqrt(pgroup%xp(ii)**2 + pgroup%yp(ii)**2)/rinject(ij))
                    else
                      --- find injection surface axial location by interpolation
                      pgroup%zp(ii)=inj_grid(ixx        ,iyy        ,ij)*(1.-wxx)*(1.-wyy) +
     &                              inj_grid(ixx+spreadx,iyy        ,ij)*    wxx *(1.-wyy) +
     &                              inj_grid(ixx        ,iyy+spready,ij)*(1.-wxx)*    wyy  +
     &                              inj_grid(ixx+spreadx,iyy+spready,ij)*    wxx *    wyy  +
     &                              inj_zstart(ij)

                      --- find injection angle due to curvature by interpolation
                      az = inj_angl(ixx        ,iyy        ,ij)*(1. - wxx)*(1. - wyy) +
     &                     inj_angl(ixx+spreadx,iyy        ,ij)*      wxx *(1. - wyy) +
     &                     inj_angl(ixx        ,iyy+spready,ij)*(1. - wxx)*      wyy  +
     &                     inj_angl(ixx+spreadx,iyy+spready,ij)*      wxx *      wyy
                    end if

                    --- Only inject particles within the injection region.
                    if (inj_d(ij) > 0.) then
                      if (pgroup%zp(ii)+zinj < zpminlocal+zgrid .or.
     &                    pgroup%zp(ii)+zinj >= zpmaxlocal+zgrid) then
                          inj_npactual(ix,iy,ij,inj_is) = inj_npactual(ix,iy,ij,inj_is) - 1.
                        cycle
                      endif
                    else
                      if (pgroup%zp(ii)+zinj <= zpminlocal+zgrid .or.
     &                    pgroup%zp(ii)+zinj > zpmaxlocal+zgrid) then
                          inj_npactual(ix,iy,ij,inj_is) = inj_npactual(ix,iy,ij,inj_is) - 1.
                        cycle
                      endif
                    endif

                    --- Calculate transverse angle. Use the precalculated
                    --- value if it is available.
                    if(l_inj_rz .or. l_inj_rz_grid) then
                      aa = theta_tmp
                    else
                      aa = atan2(pgroup%yp(ii),pgroup%xp(ii))
                    endif

                    --- Calculate and save the fraction of time step this
                    --- particle is to be advanced. Also equal to dt minus
                    --- the time of emission, all divided by dt.
                    --- Note that the number is stored as a fraction between
                    --- zero and one since pid does double duty, storing
                    --- also the number of the injection source. Multiplying
                    --- by dt would lose too many digits when added to an
                    --- integer.
                    --- The fractions are chosen using the digit reversed
                    --- number generator and not particle number since they
                    --- are correlated with transverse position.
                    if(l_inj_regular) then
                      pgroup%pid(ii,injpid) = rnrev(injctcnt,dig1)
                    else
                      pgroup%pid(ii,injpid) = rnrev(injctcnt,dig9)
                    end if

                    --- save time of creation if tpid > 0
                    if(tpidɬ) pgroup%pid(ii,tpid) = time + fulldt_s

                    --- reverse sign of pid(ii,tpid) if temperature added
                    --- after delay.
                    --- negative pid is then used to track particles on which
                    --- to add temperature.
                    if(l_inj_delay_temp) then
                      pgroup%pid(ii,tpid) = -pgroup%pid(ii,tpid)
                    endif

                    --- save radius of creation if rpid > 0
                    if(rpidɬ) then
                      pgroup%pid(ii,rpid) =
     &                            sqrt(pgroup%xp(ii)**2+pgroup%yp(ii)**2)
                    endif

                    if (inject .ne. 6) then
                      --- Get the normal velocity for this particle. This is
                      --- based on the Child-Langmuir solution, given a
                      --- voltage drop, vnorm, and a time since emission,
                      --- pid(ii,injpid)*dt.
                      vnorm = inj_phi(ixx        ,iyy        ,ij)*(1. - wxx)*(1. - wyy) +
     &                        inj_phi(ixx+spreadx,iyy        ,ij)*      wxx *(1. - wyy) +
     &                        inj_phi(ixx        ,iyy+spready,ij)*(1. - wxx)*      wyy  +
     &                        inj_phi(ixx+spreadx,iyy+spready,ij)*      wxx *      wyy
                      vznorm = vzconst(jsid)*abs(vnorm)**1.5*
     &                         (pgroup%pid(ii,injpid)*fulldt_s)**2
                      if (inj_d(ij) < 0.) vznorm = -vznorm
                    else
                      vznorm = 0.
                    endif

                    --- Set transverse coordinates. The emittance term
                    --- is probably not correct since it depends on vbeam.
                    --- The correct way to add a transverse thermal spread
                    --- is via vthperp_s. Also add in the transverse
                    --- component of the normal velocity and the specified
                    --- vbeam (which is also taken to be normal).
                    --- Like the emittance term, the term with ap (and bp)
                    --- are probably not correct.
                    pgroup%uxp(ii) = spreadvx*(vzinj*
     &                               (apinj*pgroup%xp(ii)*ainji + xpinj)
     &                      - (vznorm + vzinj)*sin(az)*cos(aa))
                    pgroup%uyp(ii) = spreadvy*(vzinj*
     &                               (bpinj*pgroup%yp(ii)*binji + ypinj)
     &                      - (vznorm + vzinj)*sin(az)*sin(aa))
                    if(.not. l_inj_delay_temp .and. .not. l_inj_along_B) then
                      vtx = vzinj*0.5*emitx_s(jsid+1)*ainji + vthperp_s(jsid+1)
                      vty = vzinj*0.5*emity_s(jsid+1)*binji + vthperp_s(jsid+1)
                      pgroup%uxp(ii) = pgroup%uxp(ii) + spreadvx*vtx*
     &                    wrandomgauss(vtrandom,injctcnt,dig3,dig4,1,1,.false.)
                      pgroup%uyp(ii) = pgroup%uyp(ii) + spreadvy*vty*
     &                    wrandomgauss(vtrandom,injctcnt,dig5,dig6,1,1,.false.)
                    end if
                    --- Include the user specified vbeam (assumed to be a
                    --- normal), the thermal velocity, and the term from
                    --- the Child-Langmuir solution.
                    pgroup%uzp(ii) = vzinj*cos(az) + vznorm*cos(az)
                    if(.not. l_inj_delay_temp .and. .not. l_inj_along_B) then
                      if(l_inj_addtempz_abs) then
                        pgroup%uzp(ii) = pgroup%uzp(ii) + abs(vthz_s(jsid+1)*
     &                   wrandomgauss(vzrandom,injctcnt,dig7,dig8,1,1,.false.))
                      else
                        pgroup%uzp(ii) = pgroup%uzp(ii) + vthz_s(jsid+1)*
     &                    wrandomgauss(vzrandom,injctcnt,dig7,dig8,1,1,.false.)
                      end if
                    end if

                    --- Calculate gamma inverse and reset the particle
                    --- massless momentums properly.
                    if (lrelativ) then
                      g2 = (1. -
     &      (pgroup%uxp(ii)**2+pgroup%uyp(ii)**2+pgroup%uzp(ii)**2)*clightsqi)
                      if (g2 < 0.) then
                        call kaboom("inject3d: particle speed is greater than clight")
                      endif
                      pgroup%gaminv(ii) = sqrt(g2)
                      gamma = 1./pgroup%gaminv(ii)
                      pgroup%uxp(ii) = pgroup%uxp(ii)*gamma
                      pgroup%uyp(ii) = pgroup%uyp(ii)*gamma
                      pgroup%uzp(ii) = pgroup%uzp(ii)*gamma
                    else
                      pgroup%gaminv(ii) = 1.
                    endif

                    --- Save the ID of the injection source. Note that pid
                    --- is doing double duty, saving the injection source
                    --- number and the fraction of the timestep the
                    --- particle is advanced.
                    pgroup%pid(ii,injpid) = pgroup%pid(ii,injpid) + ij

                    --- Save SSN 
                    if (spidɬ) then
                      pgroup%pid(ii,spid) = ssn
                      ssn = ssn+1
                    end if

                    if (xbirthpid > 0) pgroup%pid(ii,xbirthpid) = pgroup%xp(ii)
                    if (ybirthpid > 0) pgroup%pid(ii,ybirthpid) = pgroup%yp(ii)
                    if (zbirthpid > 0) pgroup%pid(ii,zbirthpid) = pgroup%zp(ii)
                    if (uxbirthpid > 0) pgroup%pid(ii,uxbirthpid)=pgroup%uxp(ii)
                    if (uybirthpid > 0) pgroup%pid(ii,uybirthpid)=pgroup%uyp(ii)
                    if (uzbirthpid > 0) pgroup%pid(ii,uzbirthpid)=pgroup%uzp(ii)

                    --- increment various particle counters
                    npinje_s(jsid+1) = npinje_s(jsid+1) + 1
                    npinjtmp(ij,jsid+1) = npinjtmp(ij,jsid+1) + 1

                    --- Make injected particle live, though more work will
                    --- to be done to it below before its ready. But now, if
                    --- chckpart shifts particles around, this one will be
                    --- shifted also.
                    pgroup%ins(is) = pgroup%ins(is) - 1
                    pgroup%nps(is) = pgroup%nps(is) + 1

                  enddo
                  --- end of loop over particles
                enddo
                --- end loop over species
              enddo
              --- end loop over ix
            enddo
            --- end loop over iy

#ifdef MPIPARALLEL
            --- Make sure that all processors have the same value of injctcnt
            --- This will only really work for flat sources, since otherwise
            --- particles may be injected on multiple processors.
            call parallelmaxintegerarray(injctcnt,1)
#endif

            --- Transform particles to lab frame
            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              ii = pgroup%ins(is)
              nn = npinjtmp(ij,jsid+1)
              if (nn > 0) then
                --- The check if nn > 0 is needed since if npmax was zero
                --- and no particles were injected then npmax stays zero
                --- and the particle arrays are unallocated.
                call inj_transform(nn,pgroup%xp(ii),pgroup%yp(ii),
     &                             pgroup%zp(ii),1,ij,1,1)
                call inj_transform(nn,pgroup%uxp(ii),pgroup%uyp(ii),
     &                             pgroup%uzp(ii),1,ij,1,0)
              endif
            enddo

          enddo
          --- end of loop over injection sources

          --- Call transverse particle scraping routine to force
          --- removal of particles outside the grid.
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            if (.not. pgroup%ldts(is-1)) cycle
            ii = pgroup%ins(is)
            nn = npinje_s(jsid+1)
            if (nn == 0) cycle

            --- Temporarily set nps so that only recently injected
            --- particles are affected. This is done only for optimization.
            nn = pgroup%nps(is) - npinje_s(jsid+1)
            pgroup%nps(is) = npinje_s(jsid+1)

            call stckxy3d(pgroup,is-1,zbeam,.false.)

#ifdef MPIPARALLEL
            --- Check if particles are within the transverse domain
            --- and skip them if not. Note that this makes inj_npactual
            --- unreliable since it is not updated to reflect the
            --- particles that are removed.
            if (solvergeom == RZgeom) then
              do ii=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
                rr = pgroup%xp(ii)**2 + pgroup%yp(ii)**2
                if (rr < xpminlocal**2 .or. rr >= xpmaxlocal**2) then
                  pgroup%gaminv(ii) = -1.
                endif
              enddo
            else
              do ii=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
                xm = pgroup%xp(ii)
                if (l4symtry) xm = abs(xm)
                ym = pgroup%yp(ii)
                if (l2symtry .or. l4symtry) ym = abs(ym)
                if (xm < xpminlocal .or. xm >= xpmaxlocal .or.
     &              ym < ypminlocal .or. ym >= ypmaxlocal) then
                  pgroup%gaminv(ii) = -1.
                endif
              enddo
            endif
#endif

            --- Clear out the lost particles. Set fillmethod=3 so that
            --- live particles are shifted upward. This keeps the block
            --- of injected particle contiguous with the rest of the
            --- live particles.
            call clearpart(pgroup,is,3)
              
            --- Adjust npinje_s(jsid+1) so it doesn't include the particles
            --- which were just scraped. Also, reset pgroup%nps(is).
            npinje_s(jsid+1) = pgroup%nps(is)
            pgroup%nps(is) = nn + npinje_s(jsid+1)

            --- Sum npinje_s to get the total number of particles injected.
            npinjected = npinjected + npinje_s(jsid+1)

          enddo

          --- Now, advance the particles off of the emitting surface using
          --- isochronous leapfrog.  Each particle has it's own time step
          --- size, uniformly distributed between 0 and dt.
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            --- Skip this if no particles of this species were injected.
            if (.not. pgroup%ldts(is-1)) cycle
            fulldt_s    = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)
            if(npinje_s(jsid+1)==0) cycle

            --- Make sure that there is enough room in the temporary arrays.
            if (npinje_s(jsid+1) > npgrp) then
              npgrp = npinje_s(jsid+1)
              call gchange("Setpwork3d",0)
            endif

            --- Make equivalences so names make sense below
            ex => at
            ey => bt
            ez => apt
            bx => bpt
            by => xct
            bz => xpct
            bendres => yct
            bendradi => ypct

            --- Zero the B field which is accumulated below
            ex = 0.
            ey = 0.
            ez = 0.
            bx = 0.
            by = 0.
            bz = 0.

            --- Assume that injection will never be done inside a bend.
            bendres = 0.
            bendradi = 1.

            --- Create equivalences
            --- In this section of code, this is purely for convenience
            --- and to make the following code look cleaner.
            nn = npinje_s(jsid+1)
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+npinje_s(jsid+1)-1
            xx => pgroup%xp(i1:i2)
            yy => pgroup%yp(i1:i2)
            zz => pgroup%zp(i1:i2)
            ux => pgroup%uxp(i1:i2)
            uy => pgroup%uyp(i1:i2)
            uz => pgroup%uzp(i1:i2)
            gg => pgroup%gaminv(i1:i2)
            id => pgroup%pid(i1:i2,injpid)

            --- get E self-field at initial positions
            if (linj_efromgrid) then
              call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   ex,ey,ez,bx,by,bz)
              call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   bx,by,bz)
            endif
            call inj_sete3d(nn,xx,yy,zz,id,ex,ey,ez)

            --- get external fields at initial positions
            call exteb3d(nn,xx,yy,zz,uz,gg,0.,fulldt_s*0.5,bx,by,bz,ex,ey,ez,
     &                   pgroup%sm(is),pgroup%sq(is),bendres,bendradi,1.,
     &                   fulldt_s)

            --- Now that the B field is gathered, the l_inj_along_B option
            --- can be handled.
            if (l_inj_along_B) then
              do ip=1,nn
                --- At this point, only the directed velocity has been added.
                vnorm = sqrt(ux(ip)**2 + uy(ip)**2 + uz(ip)**2)
                bmag  = sqrt(bx(ip)**2 + by(ip)**2 + bz(ip)**2)
                if (bmag > 0.) then
                  vtx = vnorm*bx(ip)/bmag
                  vty = vnorm*by(ip)/bmag
                  vtz = vnorm*bz(ip)/bmag
                  --- Make sure that the new v is in the same general
                  --- direction as the original.
                  if (ux(ip)*vtx + uy(ip)*vty + uz(ip)*vtz < 0.) then
                    vtx = -vtx
                    vty = -vty
                    vtz = -vtz
                  endif
                  ux(ip) = vtx
                  uy(ip) = vty
                  uz(ip) = vtz
                endif

                if(.not. l_inj_delay_temp) then
                  --- Now add in the thermal piece.
                  --- First calculate it in the frame of the emitter.
                  --- This ii won't give the same injctcnt sequence that was
                  --- used above in the position, since some particles have
                  --- been thrown away, but at least it gives unique random
                  --- numbers.
                  ii = injctcnt - nn + ip
                  uxt(ip) = spreadvx*vthperp_s(jsid+1)*
     &                      wrandomgauss(vtrandom,ii,dig3,dig4,1,1,.false.)
                  uyt(ip) = spreadvy*vthperp_s(jsid+1)*
     &                      wrandomgauss(vtrandom,ii,dig5,dig6,1,1,.false.)
                  uzt(ip) = vthz_s(jsid+1)*
     &                      wrandomgauss(vzrandom,ii,dig7,dig8,1,1,.false.)
                  if(l_inj_addtempz_abs) then
                    uzt(ip) = abs(uzt(ip))
                  end if
                end if

              enddo

              if(.not. l_inj_delay_temp) then
                --- Convert the thermal velocity to the lab frame. Note that
                --- this only matters if vthperp != vthz
                indx(1:nn) = int(pgroup%pid(i1:i2,injpid))
                call inj_transform(nn,uxt,uyt,uzt,nn,indx,1,0)
                --- Now add it in.
                ux = ux + uxt(1:nn)
                uy = uy + uyt(1:nn)
                uz = uz + uzt(1:nn)
              endif

            endif

            --- Fetch the time step size
            tt(1:nn) = fulldt_s*(id - int(id))

            --- do half velocity advance with E fields
            call epusht3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),tt,0.5)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

            --- do half velocity advance with B fields
            call bpusht3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5,ibpush)

            --- do full position advance
            call xpusht3d(nn,xx,yy,zz,ux,uy,uz,gg,tt)

            --- setrho is now done by the setrho call in padvnc3d.
          enddo

          --- print warning if no particles were injected
          if (debug .and. npinjected == 0) then
            call remark("No particles were injected.")
          endif

        elseif (itask == 2) then
  Do second part of constant current injection: get new E fields (this is
  done after field solve including injected particles), synchronize velocity
  with position, and then move velocity one half timestep back to match time
  level of rest of particles.
          --- loop over species
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            if (.not. pgroup%ldts(is-1)) cycle
            if (pgroup%nps(is) == 0) cycle
            if (ALL(finject(:,jsid+1) == 0.)) cycle
            fulldt_s = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)

#ifndef MPIPARALLEL
            --- Skip this if no particles of this species were injected.
            if(npinje_s(jsid+1)==0) cycle
            nn = npinje_s(jsid+1)
#else
            --- Find particles that were freshly injected. These
            --- are particles which still have noninteger pid(:,injpid).
            allocate(mask(pgroup%nps(is)),stat=allocerror)
            if (allocerror /= 0) then
              print*,"inject3d: allocation error ",allocerror,
     &               ": could not allocate mask to shape ",pgroup%nps(is)
              call kaboom("inject3d: allocation error")
              return
            endif

            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            where (pgroup%pid(i1:i2,injpid) < ninject+1)
              mask = ((pgroup%pid(i1:i2,injpid) -
     &             int(pgroup%pid(i1:i2,injpid))))
            elsewhere
              mask = 0.
            endwhere
            nn = COUNT(mask > 0.)
            if (nn == 0) then
              deallocate(mask)
              cycle
            endif
#endif

            --- Make sure that there is enough room in the temporary arrays.
            if (nn > npgrp) then
              npgrp = nn
              call gchange("Setpwork3d",0)
            endif

            --- Make equivalences so names make sense below
            ex => at
            ey => bt
            ez => apt
            bx => bpt
            by => xct
            bz => xpct
            bendres => yct
            bendradi => ypct

            --- Zero the B field which is accumulated below
            ex = 0.
            ey = 0.
            ez = 0.
            bx = 0.
            by = 0.
            bz = 0.

            --- Assume that injection will never be done inside a bend.
            bendres = 0.
            bendradi = 1.

#ifndef MPIPARALLEL
            --- Fetch time step size
            i1 = pgroup%ins(is)+ntinject(jsid+1)
            i2 = pgroup%ins(is)+ntinject(jsid+1)+npinje_s(jsid+1)-1
            tt(1:nn) = fulldt_s*(pgroup%pid(i1:i2,injpid) -
     &                       int(pgroup%pid(i1:i2,injpid)))
            --- Create equivalences
            xx => pgroup%xp(i1:i2)
            yy => pgroup%yp(i1:i2)
            zz => pgroup%zp(i1:i2)
            ux => pgroup%uxp(i1:i2)
            uy => pgroup%uyp(i1:i2)
            uz => pgroup%uzp(i1:i2)
            gg => pgroup%gaminv(i1:i2)
            id => pgroup%pid(i1:i2,injpid)
#else
            --- Create equivalences
            xx => xt(1:nn)
            yy => yt(1:nn)
            zz => zt(1:nn)
            ux => uxt(1:nn)
            uy => uyt(1:nn)
            uz => uzt(1:nn)
            gg => rt(1:nn)
            id => perpscal(1:nn)
            --- Fetch time step size
            tt(1:nn) = fulldt_s*PACK(mask,mask > 0.)
            --- Copy particle data into temporary arrays, collecting
            --- only particles that have been injected this time step.
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            xx = PACK(pgroup%xp(i1:i2),mask > 0.)
            yy = PACK(pgroup%yp(i1:i2),mask > 0.)
            zz = PACK(pgroup%zp(i1:i2),mask > 0.)
            ux = PACK(pgroup%uxp(i1:i2),mask > 0.)
            uy = PACK(pgroup%uyp(i1:i2),mask > 0.)
            uz = PACK(pgroup%uzp(i1:i2),mask > 0.)
            gg = PACK(pgroup%gaminv(i1:i2),mask > 0.)
            id = PACK(pgroup%pid(i1:i2,injpid),mask > 0.)
#endif

            --- calculate new E self-fields
            if (linj_efromgrid) then
              call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   ex,ey,ez,bx,by,bz)
              call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   bx,by,bz)
            endif
            call inj_sete3d(nn,xx,yy,zz,id,ex,ey,ez)

            --- Get external fields at current positions.
            call exteb3d(nn,xx,yy,zz,uz,gg,-fulldt_s*0.5,0.,bx,by,bz,ex,ey,ez,
     &                pgroup%sm(is),pgroup%sq(is),bendres,bendradi,1.,fulldt_s)

            --- complete B advance
            call bpusht3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5,ibpush)

            --- complete the E advance
            call epusht3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

            --- Now, move velocites back one half a step
            --- first half of a backward B advance
            call bpush3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    -0.5*fulldt_s,ibpush)

            --- then half of a backward E advance
            call epush3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),
     &                    -0.5*fulldt_s)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

#ifdef MPIPARALLEL
            --- Copy data back into the particle arrays
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            pgroup%xp(i1:i2) = UNPACK(xx,maskɬ.,pgroup%xp(i1:i2))
            pgroup%yp(i1:i2) = UNPACK(yy,maskɬ.,pgroup%yp(i1:i2))
            pgroup%zp(i1:i2) = UNPACK(zz,maskɬ.,pgroup%zp(i1:i2))
            pgroup%uxp(i1:i2) = UNPACK(ux,maskɬ.,pgroup%uxp(i1:i2))
            pgroup%uyp(i1:i2) = UNPACK(uy,maskɬ.,pgroup%uyp(i1:i2))
            pgroup%uzp(i1:i2) = UNPACK(uz,maskɬ.,pgroup%uzp(i1:i2))
            pgroup%ex(i1:i2) = UNPACK(ex,maskɬ.,pgroup%ex(i1:i2))
            pgroup%ey(i1:i2) = UNPACK(ey,maskɬ.,pgroup%ey(i1:i2))
            pgroup%ez(i1:i2) = UNPACK(ez,maskɬ.,pgroup%ez(i1:i2))
            pgroup%bx(i1:i2) = UNPACK(bx,maskɬ.,pgroup%bx(i1:i2))
            pgroup%by(i1:i2) = UNPACK(by,maskɬ.,pgroup%by(i1:i2))
            pgroup%bz(i1:i2) = UNPACK(bz,maskɬ.,pgroup%bz(i1:i2))
            pgroup%pid(i1:i2,injpid) = int(pgroup%pid(i1:i2,injpid))
            deallocate(mask)
#endif

         --- end of loop over species
         enddo

        endif

  Axially directed space-charge limited injection is now complete
  for this time step.
      endif

 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
  Injection off of a tranverse facing surface.
  Uses any of the three methods implemented, based off the value of
  the variable 'tinject'.

      if (ntinj > 0 .and.
     &    1 <= tinject .and. tinject <= 3) then

        if (itask == 1) then

          tinj_npactual = 0.

          --- zero ntinject for tinject = 2 or 3
          if (tinject == 2 .or. tinject == 3) then
            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              if (.not. pgroup%ldts(is-1)) cycle
              ntinject(jsid+1) = 0
            enddo
          endif

          --- Loop over injection sources, and precalculate the number of
          --- particles that will be injected. This is so that the particle
          --- arrays will be reallocated only once, if needed.
          do ij=1,ntinj

            --- loop over species
            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              if (.not. pgroup%ldts(is-1)) cycle
              if (ftinject(ij,jsid+1) == 0.) cycle
              fulldt_s    = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)
              qoverm = pgroup%sq(is)/pgroup%sm(is)

              --- Load particles one azimuthal section at a time,
              --- only if there is a positive number of injected particles.
              do iz=0,nztinj(ij)-1
                zm = ztinjmn(ij) + iz*dztinj(ij)

                --- Skip sections that are out of the grid
                if (zm+dztinj(ij) < zpminlocal+zgrid .or.
     &              zm >= zpmaxlocal+zgrid) cycle

                ainj = atinjectz(iz,ij)
                binj = btinjectz(iz,ij)
                if (ltinj_outward(ij)) then
                  rsign = +1.
                else
                  rsign = -1.
                endif

                --- Area of section of emitting surface. Note that the circum includes
                --- any reduced angular extent.
                thetaextent = thetamaxtinj(iz,ij) - thetamintinj(iz,ij)
                circum = ellipseperimeter(ainj,binj)*thetaextent/(2.*pi)
                area = dztinj(ij)*circum/nttinj(ij)
                dtheta = thetaextent/nttinj(ij)

                do ith=0,nttinj(ij)-1

                  --- angle of point in transverse plane
                  aa = thetamintinj(iz,ij) + ith*dtheta
                  p1x = ainj*cos(aa)
                  p1y = binj*sin(aa)
                  rinj = sqrt(p1x**2 + p1y**2)
                  --- For an ellipse, the larger the axis, the smaller the curvature,
                  --- so as an adhoc fix for the cylindrical correction to CL,
                  --- swap x and y to estimate the radius of curvature. This has
                  --- no effect on a circle.
                  rinj = sqrt((ainj*sin(aa))**2 + (binj*cos(aa))**2)
                  rinji = 1./rinj

                  --- Find coordinates of the point on the virtual surface
                  --- along a line perpendicular to the emitting surface.
                  --- Also calculate the distance of that point from the surface.
                  --- Note that dx, dy, and dz are used since these relate to
                  --- the field grid, not the injection grid.
                  p2x = (ainj + rsign*inj_dx)*cos(aa)
                  p2y = (binj + rsign*inj_dx*ainj/binj)*sin(aa)
                  dr = rsign*sqrt((p2x-p1x)**2 + (p2y-p1y)**2)
                  dri = 1./dr

                  --- Fetch difference between phi at that point and phi on
                  --- the emitting surface, averaging over z
                  vavez = 0.5*(tinj_phi(ith,iz,ij)+tinj_phi(ith,iz+1,ij))

                  --- number of particles injected in grid cell
                  if (tinject == 1) then
                    rnn = ntinject(jsid+1)*ftinject(ij,jsid+1)/real(nttinj(ij)*nztinj(ij))
                  elseif (tinject == 2) then
                    --- Const for tinject = 2.  Denominator includes factor for
                    --- emitting on concentric cylinders.  See I. Langmuir,
                    --- K. Blodgett, "Currents Limited by Space Charge Between
                    --- Concentric Spheres", PhysRev, 1924.
                    const = 4./9.*eps0*(2.*abs(qoverm))**.5*dri**2*
     &                      fulldt_s*area/abs(pgroup%sq(is))/pgroup%sw(is)/
     &                      (1. - 0.8*dr*rinji + 0.66*(dr*rinji)**2)
                    rnn = ftinject(ij,jsid+1)*const*abs(vavez)**1.5
                    rnn = sign(rnn,pgroup%sq(is)*vavez)
                    rnn = inj_param*rnn +
     &                    (1. - inj_param)*tinjprev(ith,iz,ij,is)
                  elseif (tinject == 3) then
                    --- Find coordinates of the point.
                    --- This only works with basic 3d and RZ solvers
                    p1x = abs(xtinject(ij) + p1x - xmmin)/dx
                    p1y = abs(ytinject(ij) + p1y - ymmin)/dy
                    p1z = (zm - zmminlocal - zgrid)/dz
                    i1x = p1x
                    i1y = p1y
                    i1z = p1z
                    w1x = p1x - i1x
                    w1y = p1y - i1y
                    w1z = p1z - i1z
                    --- get charge density at injection point
                    rr = rhop(i1x  ,i1y  ,i1z  )*(1.-w1x)*(1.-w1y)*(1.-w1z) +
     &                   rhop(i1x  ,i1y+1,i1z  )*(1.-w1x)*    w1y *(1.-w1z) +
     &                   rhop(i1x+1,i1y  ,i1z  )*    w1x *(1.-w1y)*(1.-w1z) +
     &                   rhop(i1x+1,i1y+1,i1z  )*    w1x *    w1y *(1.-w1z) +
     &                   rhop(i1x  ,i1y  ,i1z+1)*(1.-w1x)*(1.-w1y)*    w1z  +
     &                   rhop(i1x  ,i1y+1,i1z+1)*(1.-w1x)*    w1y *    w1z  +
     &                   rhop(i1x+1,i1y  ,i1z+1)*    w1x *(1.-w1y)*    w1z  +
     &                   rhop(i1x+1,i1y+1,i1z+1)*    w1x *    w1y *    w1z
                    rnn=(eps0*vavez*dri - rr*abs(dr))*area/
     &                   pgroup%sq(is)/pgroup%sw(is)*ftinject(ij,jsid+1)
                    rnn = inj_param*rnn +
     &                    (1. - inj_param)*tinjprev(ith,iz,ij,is)
                  endif
                  tinjprev(ith,iz,ij,is) = rnn
                  if (ith == 0) tinjprev(nttinj(ij),iz,ij,is) = tinjprev(ith,iz,ij,is)

                  rnn = min(rnn,
     &                      abs(jmaxtinj(ij)*area*fulldt_s/pgroup%sq(is)/
     &                      pgroup%sw(is)*ftinject(ij,jsid+1)))
                  nn = int(rnn + wranf())

                  if (nn < 0) nn = 0.
                  tinj_npactual(ith,iz,ij,is) = nn

                enddo
              enddo

            --- end loop over species
            enddo

          --- end loop over transverse injection sources
          enddo

          do is=1,pgroup%ns
            --- Make sure there is room for more particles. Calculate the
            --- total number of particles to be injected from the sources.
            nn = sum(tinj_npactual(:,:,:,is))
            call chckpart(pgroup,is,nn,0)
          enddo

          --- Loop over injection sources
          do ij=1,ntinj

            --- loop over species
            do is=1,pgroup%ns
              jsid = pgroup%sid(is-1)
              if (.not. pgroup%ldts(is-1)) cycle
              if (ftinject(ij,jsid+1) == 0.) cycle
              fulldt_s    = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)
              nti = 0
              qoverm = pgroup%sq(is)/pgroup%sm(is)

              --- Load particles one azimuthal section at a time,
              --- only if there is a positive number of injected particles.
              do iz=0,nztinj(ij)-1
                zm = ztinjmn(ij) + iz*dztinj(ij)

                --- Skip sections that are out of the grid
                if (zm+dztinj(ij) < zpminlocal+zgrid .or.
     &              zm >= zpmaxlocal+zgrid) cycle

                ainj = atinjectz(iz,ij)
                binj = btinjectz(iz,ij)
                ainjp1 = atinjectz(iz+1,ij)
                binjp1 = btinjectz(iz+1,ij)
                if (ltinj_outward(ij)) then
                  rsign = +1.
                else
                  rsign = -1.
                endif

                --- Area of section of emitting surface. Note that the circum includes
                --- any reduced angular extent.
                thetaextent = thetamaxtinj(iz,ij) - thetamintinj(iz,ij)
                circum = ellipseperimeter(ainj,binj)*thetaextent/(2.*pi)
                dtheta = thetaextent/nttinj(ij)

                --- Check if the angular extent is 2*pi, in which case particles are
                --- loaded over the full circle. The main issue is whether particles
                --- at the min and max theta range overlap or not. The dtheta/2 is
                --- an ad hoc fuzz to avoid round off problems.
                if (thetaextent < 2.*pi - dtheta/2.) then
                  ltinj_full_circle = .false.
                else
                  ltinj_full_circle = .true.
                endif

                do ith=0,nttinj(ij)-1

                  --- angle of point in transverse plane
                  aa = thetamintinj(iz,ij) + ith*dtheta
                  p1x = ainj*cos(aa)
                  p1y = binj*sin(aa)

                  --- Find coordinates of the point on the virtual surface
                  --- along a line perpendicular to the emitting surface.
                  --- Also calculate the distance of that point from the surface.
                  --- Note that dx, dy, and dz are used since these relate to
                  --- the field grid, not the injection grid.
                  p2x = (ainj + rsign*inj_dx)*cos(aa)
                  p2y = (binj + rsign*inj_dx*ainj/binj)*sin(aa)
                  dr = rsign*sqrt((p2x-p1x)**2 + (p2y-p1y)**2)
                  dri = 1./dr

                  --- Fetch difference between phi at that point and phi on
                  --- the emitting surface, averaging over z
                  vavez = 0.5*(tinj_phi(ith,iz,ij)+tinj_phi(ith,iz+1,ij))

                  --- normal velocity
                  vnorm = fulldt_s*qoverm*vavez*dri

                  --- Axial velocity
                  --- Calculated using axial E field on virtual surface.
                  --- This does not take into account any variation in a and b
                  vznorm = fulldt_s*qoverm*
     &                     (tinj_phi(ith,iz,ij)-tinj_phi(ith,iz+1,ij))/dztinj(ij)

                  nn = tinj_npactual(ith,iz,ij,is)
                  do ip=1,nn
                    ii = pgroup%ins(is) - 1
                    --- calculate position of new particle
                    wz = wrandom(xrandom,injctcnt,dig1,1,1)
                    pgroup%zp(ii) = zm + wz*dztinj(ij)

                    --- Only inject particles within the injection region.
                    if (pgroup%zp(ii) < zpminlocal+zgrid .or.
     &                  pgroup%zp(ii) >= zpmaxlocal+zgrid) cycle

                    t = wrandom(xrandom,injctcnt,dig3,1,1)
                    t = aa + (t - .5)*dtheta

                    if (.not. ltinj_full_circle) then
                      --- If not loading particles into the full circle, then skip
                      --- particles beyond thetamin and thetamax.
                      tmin = thetamintinj(iz,ij)*(1.-wz) + thetamintinj(iz+1,ij)*wz
                      tmax = thetamaxtinj(iz,ij)*(1.-wz) + thetamaxtinj(iz+1,ij)*wz
                      if (t < tmin .or. t > tmax) cycle
                    endif

                    pgroup%xp(ii) = xtinject(ij) + (ainj*(1.-wz) + ainjp1*wz)*cos(t)
                    pgroup%yp(ii) = ytinject(ij) + (binj*(1.-wz) + binjp1*wz)*sin(t)
                    pgroup%uxp(ii) = vnorm*cos(t)
                    pgroup%uyp(ii) = vnorm*sin(t)
                    pgroup%uzp(ii) = vznorm
                    if (.not. l_inj_along_B) then
                      --- When the particles are injected along B, the thermal
                      --- velocity can only be added after the directed
                      --- velocity has been rotated to be along B.
                      vtx = 0.5*vztinject(ij)*emitx_s(jsid+1)/(circum/thetaextent) + 
     &                      vthperp_s(jsid+1)
     &                    *wrandomgauss(vtrandom,injctcnt,dig3,dig4,1,1,.false.)
                      vty = 0.5*vztinject(ij)*emity_s(jsid+1)/(circum/thetaextent) + 
     &                      vthperp_s(jsid+1)
     &                    *wrandomgauss(vtrandom,injctcnt,dig5,dig6,1,1,.false.)
                      vtz = vthz_s(jsid+1)
     &                    *wrandomgauss(vzrandom,injctcnt,dig7,dig8,1,1,.false.)
                      pgroup%uxp(ii) = pgroup%uxp(ii) + vtx
                      pgroup%uyp(ii) = pgroup%uyp(ii) + vty
                      pgroup%uzp(ii) = pgroup%uzp(ii) + vtz
                    endif
                    if (lrelativ) then
                      g2 = (1. -
     &     (pgroup%uxp(ii)**2+pgroup%uyp(ii)**2+pgroup%uzp(ii)**2)*clightsqi)
                      if (g2 < 0.) then
                        call kaboom("inject3d: particle speed is greater than clight")
                      endif
                      pgroup%gaminv(ii) = sqrt(g2)
                      gamma = 1./pgroup%gaminv(ii)
                      pgroup%uxp(ii) = pgroup%uxp(ii)*gamma
                      pgroup%uyp(ii) = pgroup%uyp(ii)*gamma
                      pgroup%uzp(ii) = pgroup%uzp(ii)*gamma
                    else
                      pgroup%gaminv(ii) = 1.
                    endif
                    --- Clear out any old data from pid. This is done here
                    --- before pid is used anywhere.
                    pgroup%pid(ii,:) = 0.
                    pgroup%pid(ii,injpid) = ninject + ij + rnrev(injctcnt,dig5)
                    --- Save SSN 
                    if (spidɬ) then
                      pgroup%pid(ii,spid) = ssn
                      ssn = ssn+1
                    end if
                    --- Set particle weight to the scale factor (since nothing
                    --- here sets the weight). All particles are injected at
                    --- the same radius, whereas normally, the weight is scaled
                    --- by the radius.
                    if (wpid > 0) then
                      pgroup%pid(ii,wpid) = wtinject(ij,is)
                    endif

                    if (xbirthpid > 0) pgroup%pid(ii,xbirthpid) = pgroup%xp(ii)
                    if (ybirthpid > 0) pgroup%pid(ii,ybirthpid) = pgroup%yp(ii)
                    if (zbirthpid > 0) pgroup%pid(ii,zbirthpid) = pgroup%zp(ii)
                    if (uxbirthpid > 0) pgroup%pid(ii,uxbirthpid)=pgroup%uxp(ii)
                    if (uybirthpid > 0) pgroup%pid(ii,uybirthpid)=pgroup%uyp(ii)
                    if (uzbirthpid > 0) pgroup%pid(ii,uzbirthpid)=pgroup%uzp(ii)

                    --- increment particle counter
                    nti = nti + 1

                    --- increment random number counter
                    injctcnt = injctcnt + 1

                    --- Make injected particle live, though more work will
                    --- to be done to it below before its ready. But now, if
                    --- chckpart shifts particles around, this one will be
                    --- shifted also.
                    pgroup%ins(is) = pgroup%ins(is) - 1
                    pgroup%nps(is) = pgroup%nps(is) + 1

                  enddo
                enddo
              enddo

              --- Set number of particles that were injected for tinject = 2 or 3
              if (tinject == 2 .or. tinject == 3) then
                ntinject(jsid+1) = ntinject(jsid+1) + nti
              endif

            --- end loop over species
            enddo

          --- end loop over transverse injection sources
          enddo

          --- Call transverse particle scraping routine to force
          --- removal of particles outside the grid.
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            if (.not. pgroup%ldts(is-1)) cycle
            ii = pgroup%ins(is)
            nn = ntinject(jsid+1)
            if (nn == 0) cycle

            --- Temporarily set nps so that only recently injected
            --- particles are affected. This is done only for optimization.
            nn = pgroup%nps(is) - ntinject(jsid+1)
            pgroup%nps(is) = ntinject(jsid+1)

            call stckxy3d(pgroup,is-1,zbeam,.false.)

#ifdef MPIPARALLEL
            --- Check if particles are within the transverse domain
            --- and delete them if not. Note that this makes inj_npactual
            --- unreliable since it is not updated to reflect the
            --- particles that are removed.
            if (solvergeom == RZgeom) then
              do ii=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
                rr = pgroup%xp(ii)**2 + pgroup%yp(ii)**2
                if (rr < xpminlocal**2 .or. rr >= xpmaxlocal**2) then
                  pgroup%gaminv(ii) = -1.
                endif
              enddo
            else
              do ii=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
                if (nx > 0) then
                  xm = pgroup%xp(ii)
                  if (l4symtry) xm = abs(xm)
                  if (xm < xpminlocal .or. xm >= xpmaxlocal) then
                    pgroup%gaminv(ii) = -1.
                  endif
                endif
                if (ny > 0) then
                  ym = pgroup%yp(ii)
                  if (l2symtry .or. l4symtry) ym = abs(ym)
                  if (ym < ypminlocal .or. ym >= ypmaxlocal) then
                    pgroup%gaminv(ii) = -1.
                  endif
                endif
              enddo
            endif
#endif

            --- Clear out the lost particles. Set fillmethod=3 so that
            --- live particles are shifted upward. This keeps the block
            --- of injected particle contiguous with the rest of the
            --- live particles.
            call clearpart(pgroup,is,3)
              
            --- Adjust ntinject(jsid+1) so it doesn't include the particles
            --- which were just scraped. Also, reset pgroup%nps(is).
            ntinject(jsid+1) = pgroup%nps(is)
            pgroup%nps(is) = nn + ntinject(jsid+1)

            --- Sum ntinject to get the total number of particles injected.
            npinjected = npinjected + ntinject(jsid+1)

          enddo

          --- print warning if no particles were injected
          if (debug .and. npinjected == 0) then
            call remark("No particles were injected transversely.")
          endif

          --- Now, advance the particles off of the emitting surface using
          --- isochronous leapfrog.  Each particle has it's own time step
          --- size, uniformly distributed between 0 and dt.
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            --- Skip this if no particles of this species were injected.
            if (.not. pgroup%ldts(is-1)) cycle
            fulldt_s = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)
            if(ntinject(jsid+1)==0) cycle

            --- Make sure that there is enough room in the temporary arrays.
            if (ntinject(jsid+1) > npgrp) then
              npgrp = ntinject(jsid+1)
              call gchange("Setpwork3d",0)
            endif

            --- Make equivalences so names make sense below
            bendres => yct
            bendradi => ypct

            --- Assume that injection will never be done inside a bend.
            bendres = 0.
            bendradi = 1.

            --- Create equivalences
            --- In this section of code, this is purely for convenience
            --- and to make the following code look cleaner.
            nn = ntinject(jsid+1)
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+ntinject(jsid+1)-1
            xx => pgroup%xp(i1:i2)
            yy => pgroup%yp(i1:i2)
            zz => pgroup%zp(i1:i2)
            ux => pgroup%uxp(i1:i2)
            uy => pgroup%uyp(i1:i2)
            uz => pgroup%uzp(i1:i2)
            gg => pgroup%gaminv(i1:i2)
            id => pgroup%pid(i1:i2,injpid)
            ex => pgroup%ex(i1:i2)
            ey => pgroup%ey(i1:i2)
            ez => pgroup%ez(i1:i2)
            bx => pgroup%bx(i1:i2)
            by => pgroup%by(i1:i2)
            bz => pgroup%bz(i1:i2)

            --- Zero the B field which is accumulated below
            ex = 0.
            ey = 0.
            ez = 0.
            bx = 0.
            by = 0.
            bz = 0.

            --- get E self-field at initial positions
            if (linj_efromgrid) then
              call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   ex,ey,ez,bx,by,bz)
              call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   bx,by,bz)
            endif
            call tinj_sete3d(nn,xx,yy,zz,id,ex,ey,ez)

            --- get external fields at initial positions
            call exteb3d(nn,xx,yy,zz,uz,gg,0.,fulldt_s*0.5,bx,by,bz,ex,ey,ez,
     &                   pgroup%sm(is),pgroup%sq(is),bendres,bendradi,1.,fulldt_s)

            --- Now that the B field is gathered, the l_inj_along_B option
            --- can be handled.
            if (l_inj_along_B) then
              do ip=1,nn
                --- At this point, only the directed velocity has been added.
                vnorm = sqrt(ux(ip)**2 + uy(ip)**2 + uz(ip)**2)
                bmag  = sqrt(bx(ip)**2 + by(ip)**2 + bz(ip)**2)
                if (bmag > 0.) then
                  vtx = vnorm*bx(ip)/bmag
                  vty = vnorm*by(ip)/bmag
                  vtz = vnorm*bz(ip)/bmag
                  --- Make sure that the new v is in the same general
                  --- direction as the original.
                  if (ux(ip)*vtx + uy(ip)*vty + uz(ip)*vtz < 0.) then
                    vtx = -vtx
                    vty = -vty
                    vtz = -vtz
                  endif
                  ux(ip) = vtx
                  uy(ip) = vty
                  uz(ip) = vtz
                endif

                --- This ii won't give the same injctcnt sequence that was
                --- used above in the position, since some particles have
                --- been thrown away, but at least it gives unique random
                --- numbers.
                ii = injctcnt - nn + ip
                vtx = vthperp_s(jsid+1)
     &              *wrandomgauss(vtrandom,ii,dig3,dig4,1,1,.false.)
                vty = vthperp_s(jsid+1)
     &              *wrandomgauss(vtrandom,ii,dig5,dig6,1,1,.false.)
                vtz = vthz_s(jsid+1)
     &              *wrandomgauss(vzrandom,ii,dig7,dig8,1,1,.false.)
                ux(ip) = ux(ip) + vtx
                uy(ip) = uy(ip) + vty
                uz(ip) = uz(ip) + vtz

              enddo

            endif

            --- Fetch the time step size
            tt(1:nn) = fulldt_s*(id - int(id))

            --- do half velocity advance with E fields
            call epusht3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),tt,0.5)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

            --- do half velocity advance with B fields
            call bpusht3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5,ibpush)

            --- do full position advance
            call xpusht3d(nn,xx,yy,zz,ux,uy,uz,gg,tt)

            --- setrho is now done by the setrho call in padvnc3d.
          enddo

        elseif (itask == 2) then
  Do second part of constant current injection: get new E fields (this is
  done after field solve including injected particles), synchronize velocity
  with position, and then move velocity one half timestep back to match time
  level of rest of particles.
          --- loop over species
          do is=1,pgroup%ns
            jsid = pgroup%sid(is-1)
            if (.not. pgroup%ldts(is-1)) cycle
            if (ALL(ftinject(:,jsid+1) == 0.)) cycle
            fulldt_s = dt*pgroup%ndts(is-1)*pgroup%dtscale(is)

#ifndef MPIPARALLEL
            --- Skip this if no particles of this species were injected.
            if(ntinject(jsid+1)==0) cycle
            nn = ntinject(jsid+1)
#else
            --- Find particles that were freshly injected. These
            --- are particles which still have noninteger pid(:,injpid).
            allocate(mask(pgroup%nps(is)),stat=allocerror)
            if (allocerror /= 0) then
              print*,"inject3d: allocation error ",allocerror,
     &               ": could not allocate mask to shape ",pgroup%nps(is)
              call kaboom("inject3d: allocation error")
              return
            endif

            if (pgroup%nps(is) == 0) cycle
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            where (pgroup%pid(i1:i2,injpid) > ninject+1)
              mask = ((pgroup%pid(i1:i2,injpid) -
     &             int(pgroup%pid(i1:i2,injpid))))
            elsewhere
              mask = 0.
            endwhere
            nn = COUNT(mask > 0.)
            if (nn == 0) then
              deallocate(mask)
              cycle
            endif
#endif

            --- Make sure that there is enough room in the temporary arrays.
            if (nn > npgrp) then
              npgrp = nn
              call gchange("Setpwork3d",0)
            endif

            --- Make equivalences so names make sense below
            bendres => yct(1:nn)
            bendradi => ypct(1:nn)

            --- Assume that injection will never be done inside a bend.
            bendres = 0.
            bendradi = 1.

#ifndef MPIPARALLEL
            --- Fetch time step size
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+ntinject(jsid+1)-1
            tt(1:nn) = fulldt_s*(pgroup%pid(i1:i2,injpid) -
     &                       int(pgroup%pid(i1:i2,injpid)))
            --- Create equivalences
            xx => pgroup%xp(i1:i2)
            yy => pgroup%yp(i1:i2)
            zz => pgroup%zp(i1:i2)
            ux => pgroup%uxp(i1:i2)
            uy => pgroup%uyp(i1:i2)
            uz => pgroup%uzp(i1:i2)
            gg => pgroup%gaminv(i1:i2)
            id => pgroup%pid(i1:i2,injpid)
            ex => pgroup%ex(i1:i2)
            ey => pgroup%ey(i1:i2)
            ez => pgroup%ez(i1:i2)
            bx => pgroup%bx(i1:i2)
            by => pgroup%by(i1:i2)
            bz => pgroup%bz(i1:i2)
#else
            --- Create equivalences
            xx => xt(1:nn)
            yy => yt(1:nn)
            zz => zt(1:nn)
            ux => uxt(1:nn)
            uy => uyt(1:nn)
            uz => uzt(1:nn)
            gg => rt(1:nn)
            id => perpscal(1:nn)
            ex => at(1:nn)
            ey => bt(1:nn)
            ez => apt(1:nn)
            bx => bpt(1:nn)
            by => xct(1:nn)
            bz => xpct(1:nn)
            --- Fetch time step size
            tt(1:nn) = fulldt_s*PACK(mask,mask > 0.)
            --- Copy particle data into temporary arrays, collecting
            --- only particles that have been injected this time step.
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            xx = PACK(pgroup%xp(i1:i2),mask > 0.)
            yy = PACK(pgroup%yp(i1:i2),mask > 0.)
            zz = PACK(pgroup%zp(i1:i2),mask > 0.)
            ux = PACK(pgroup%uxp(i1:i2),mask > 0.)
            uy = PACK(pgroup%uyp(i1:i2),mask > 0.)
            uz = PACK(pgroup%uzp(i1:i2),mask > 0.)
            gg = PACK(pgroup%gaminv(i1:i2),mask > 0.)
            id = PACK(pgroup%pid(i1:i2,injpid),mask > 0.)
#endif

            --- Zero the B field which is accumulated below
            ex = 0.
            ey = 0.
            ez = 0.
            bx = 0.
            by = 0.
            bz = 0.

            --- calculate new E self-fields
            if (linj_efromgrid) then
              call fetche3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   ex,ey,ez,bx,by,bz)
              call fetchb3dfrompositions(jsid,pgroup%ndts(is-1),nn,xx,yy,zz,
     &                                   bx,by,bz)
            endif
            call tinj_sete3d(nn,xx,yy,zz,id,ex,ey,ez)

            --- Get external fields at current positions.
            call exteb3d(nn,xx,yy,zz,uz,gg,-fulldt_s*0.5,0.,bx,by,bz,ex,ey,ez,
     &                pgroup%sm(is),pgroup%sq(is),bendres,bendradi,1.,fulldt_s)

            --- complete B advance
            call bpusht3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5,ibpush)

            --- complete the E advance
            call epusht3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),
     &                    tt,0.5)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

            --- Now, move velocites back one half a step
            --- first half of a backward B advance
            call bpush3d(nn,ux,uy,uz,gg,bx,by,bz,pgroup%sq(is),pgroup%sm(is),
     &                    -0.5*fulldt_s,ibpush)

            --- then half of a backward E advance
            call epush3d(nn,ux,uy,uz,ex,ey,ez,pgroup%sq(is),pgroup%sm(is),
     &                    -0.5*fulldt_s)

            --- Advance relativistic Gamma factor
            call gammaadv(nn,gg,ux,uy,uz,gamadv,lrelativ)

#ifdef MPIPARALLEL
            --- Copy data back into the particle arrays
            i1 = pgroup%ins(is)
            i2 = pgroup%ins(is)+pgroup%nps(is)-1
            pgroup%xp(i1:i2) = UNPACK(xx,maskɬ.,pgroup%xp(i1:i2))
            pgroup%yp(i1:i2) = UNPACK(yy,maskɬ.,pgroup%yp(i1:i2))
            pgroup%zp(i1:i2) = UNPACK(zz,maskɬ.,pgroup%zp(i1:i2))
            pgroup%uxp(i1:i2) = UNPACK(ux,maskɬ.,pgroup%uxp(i1:i2))
            pgroup%uyp(i1:i2) = UNPACK(uy,maskɬ.,pgroup%uyp(i1:i2))
            pgroup%uzp(i1:i2) = UNPACK(uz,maskɬ.,pgroup%uzp(i1:i2))
            pgroup%ex(i1:i2) = UNPACK(ex,maskɬ.,pgroup%ex(i1:i2))
            pgroup%ey(i1:i2) = UNPACK(ey,maskɬ.,pgroup%ey(i1:i2))
            pgroup%ez(i1:i2) = UNPACK(ez,maskɬ.,pgroup%ez(i1:i2))
            pgroup%bx(i1:i2) = UNPACK(bx,maskɬ.,pgroup%bx(i1:i2))
            pgroup%by(i1:i2) = UNPACK(by,maskɬ.,pgroup%by(i1:i2))
            pgroup%bz(i1:i2) = UNPACK(bz,maskɬ.,pgroup%bz(i1:i2))
            pgroup%pid(i1:i2,injpid) = int(pgroup%pid(i1:i2,injpid))
            deallocate(mask)
#endif

         --- end of loop over species
         enddo

        endif

      endif
      --- End of transverse injection

!$OMP MASTER
      if (lw3dtimesubs) timeinject3d = timeinject3d + wtime() - substarttime
!$OMP END MASTER

      return
      end

[w3dgen]
      subroutine injctint(pgroup)
      use ParticleGroupmodule
      use Subtimersw3d
      use Constant
      use Beam_acc
      use InGen
      use InGen3d
      use InPart
      use InMesh3d
      use Particles,Only: npmax,wpid,tpid,rpid
      use InjectVars
      use InjectVars3d
      use Setpwork3d
      use Picglb3d
      use Parallel
      type(ParticleGroup):: pgroup

      real(kind=8):: xmin,xmax,ymin,ymax
      real(kind=8):: ixmin,ixmax,iymin,iymax
      integer(ISZ):: inx,iny,iz
      integer(ISZ):: js,is,jsid,ip,ij,nn,nnmax,ij1,ij2
      real(kind=8):: total_frac,dd,maxdxdy
      real(kind=8):: maxab1,maxab2,minab1,minab2
      real(kind=8):: circum,circummax
      integer(ISZ), external :: oneiftrue, nextpid
      real(kind=8):: ellipseperimeter
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      if ((inject < 1 .or. inject > 8) .and.
     &    (tinject < 1 .or. tinject > 3)) return

  Set wpid if needed and not set by user
      if((l_inj_rz .or. l_inj_regular) .and. wpid==0) then
        if (wpid == 0) wpid = nextpid()
      endif

  If delay on setting temperature of injected particles, then
  time of particle creation is recorded initially with a negative value
  switched to positive when temperature has been added.
      if(l_inj_delay_temp) l_inj_rec_inittime = .true.

  Set tpid if needed and not set by user
      if(l_inj_rec_inittime .and. tpid==0) then
        if (tpid == 0) tpid = nextpid()
      end if

  Set rpid if needed and not set by user
      if(l_inj_rec_initradius .and. rpid==0) then
        if (rpid == 0) rpid = nextpid()
      end if

  Set pid index for injection information for particles
      if (injpid == 0) injpid = nextpid()

  Make sure that space has been allocated for the arrays. This is needed
  in case the user changes ninject or ns but doesn't explicitly allocate
  the additional needed space.
      call gchange("InjectVars",0)

  Initialize beam size from initial envelope parameters if beam size not set
  One problem with this is that if the user ever wants apinject or bpinject
  to be zero while ap0 or bp0 not be zero.  The user would have to reset
  apinject or bpinject back to zero after the generate.
      do ij=1,ninject
        if (ainject(ij)  == 0.) ainject(ij)  = a0
        if (binject(ij)  == 0.) binject(ij)  = b0
        if (apinject(ij) == 0.) apinject(ij) = ap0
        if (bpinject(ij) == 0.) bpinject(ij) = bp0
      enddo

      --- Setup grids covering injection surfaces.
      call getparticleextent(xmmin,xmmin+nx*dx,ymmin,ymmin+ny*dy,
     &                       l2symtry,l4symtry,solvergeom,
     &                       xmin,xmax,ymin,ymax)
      inj_ninj = ninject

      if (inj_dx == 0.) inj_dx = dx
      if (inj_dy == 0.) then
        if (solvergeom == RZgeom .or. l_inj_rz .or. l_inj_rz_grid) then
          inj_dy = inj_dx
        else
          inj_dy = dy
        endif
      endif
      if (inj_dz == 0.) inj_dz = dz

      do ij=1,ninject
        if (l_inj_rz .or. l_inj_rz_grid) then
          ixmin = 0
          ixmax = min(xmax,xinject(ij) + ainject(ij) + 2*inj_dx)
        else
          ixmin = max(xmin,xinject(ij) - ainject(ij) - 2*inj_dx)
          ixmax = min(xmax,xinject(ij) + ainject(ij) + 2*inj_dx)
        endif

        inx = nint((ixmax - ixmin)/inj_dx)
        inj_nx = max(inj_nx,inx)

        if (l_inj_rz .or. l_inj_rz_grid) then
          inj_ny = 0
        else
          iymin = max(ymin,yinject(ij) - binject(ij) - 2*inj_dy)
          iymax = min(ymax,yinject(ij) + binject(ij) + 2*inj_dy)
          iny = nint((iymax - iymin)/inj_dy)
          inj_ny = max(inj_ny,iny)
        endif

      enddo
      call gchange("InjectVars3d",0)

      if(l_inj_rz .or. l_inj_rz_grid) then
        if(.not. l_inj_area) then
          inj_dx = ainject(1)/(real(nint(ainject(1)/inj_dx))+0.5)
          inj_xwide = 1
          inj_ywide = 1
        end if
      else
        l_inj_area = .true.
      end if

      --- This is just the standard assumption.
      --- Keep for backwards compatibility.
      if (l_inj_rz) l_inj_exact = .true.

      inj_dz0 = inj_dz
      if(inj_nzɭ) inj_dz = inj_d(1)*inj_dz/(real(inj_nz-1)*2.**(1./3.)+real(2-inj_nz))**3
      if(inj_nz > 1 .and. ninject > 1) then
        call kaboom("injctint: inj_nz > 1 not yet supported for ninject > 1")
        return
      end if

      --- Set mins for meshes around each source.
      if (l_inj_rz .or. l_inj_rz_grid) then
        inj_xmmin = 0.
        inj_ymmin = 0.
      else
        inj_xmmin = max(xmin, min(inj_xmmin,- inj_nx/2*inj_dx))
        inj_ymmin = max(ymin, min(inj_ymmin,- inj_ny/2*inj_dy))
      endif

  Set up finject array if not set by the user (if sum of finject = 0).
  Give each injection source the same distribution of species.
      if (ninject > 0 .and. ns > 0) then
        if (sum(finject) == 0.) then
          do is=1,ns
            finject(:,is) = sp_fract(is)
          enddo
        endif
      endif

  Set value of vzinject for inject==1 if not already set. For other
  types of injection, vzinject is assumed to be zero, since the starting
  velocity is calculated from the local fields.
      do ij=1,ninject
        do is=1,ns
          if (inject == 1 .and. vzinject(ij,is) == 0.) then
            vzinject(ij,is) = vbeam_s(is)
          endif
        enddo
      enddo

  Set the injection length and start, normally the length of one time step
  and at zimin, but can be set by user.
      if (leninjct == 0) leninjct = vbeam*dt

      --- Calculate number of particles to inject for each species if
      --- not set by the user.
      --- npinje_s was being multiplied by sp_fract, but it is also being
      --- multiplied by finject in inject3d, so the fraction was being
      --- applied twice. The one here was removed since the finject is
      --- still needed to control npinje_s per injection source.
      --- The sp_fract was then also removed from the calculation of sw below.
      do is=1,ns
        if (npinje_s(is) == 0) npinje_s(is) = npinject
        if (rnpinje_s(is) == 0.) rnpinje_s(is) = npinje_s(is)
      enddo

      --- Print warning if npinject=0
      if (npinject == 0) then
        call remark("ERROR: injctint, npinject is zero, no particles will
     &  be injected.")
      endif

  Each species is stored in seperate parts of the particle arrays, the size of
  which is based off the beam fraction of each species.  Check if there is
  enough space in each of the species blocks.
  If there are no particles yet, then reallocate the arrays to the correct
  size. This is much faster when there are multiple species.
  Also set particle parameters: charge, mass and weight
      total_frac = sum(sp_fract)
      if (maxval(pgroup%nps) == 0) then
        npmax = max(injctspc,npmax)
        np_s = 0
        sp_fract = sp_fract/total_frac
        call alotpart(pgroup)
        sp_fract = sp_fract*total_frac
      endif
      do js=0,pgroup%ns-1
        if (ninject == 0) cycle
        --- If no particles are to be injected from this species, then skip
        --- the code below.
        if (sum(finject(:,js+1)) == 0.) cycle
        jsid = pgroup%sid(js)
        if (maxval(pgroup%nps) /= 0) then
          call chckpart(pgroup,js+1,int(injctspc*sp_fract(jsid+1)/total_frac+1),0)
        endif
        if (pgroup%sq(js+1) == 0.) pgroup%sq(js+1) = zion_s(jsid+1)*echarge
        if (pgroup%sm(js+1) == 0.) pgroup%sm(js+1) = aion_s(jsid+1)*amu
        if (pgroup%sw(js+1) == 0.) then
          if (npinje_s(jsid+1) > 0 .and. pgroup%sq(js+1) /= 0.) then
            pgroup%sw(js+1) = abs((ibeam_s(jsid+1)*dt*pgroup%ndts(js)/
     &                            pgroup%sq(js+1))/npinje_s(jsid+1))
          else if (js > 0 .and.
     &             pgroup%sq(js+1) == pgroup%sq(js) .and.
     &             pgroup%sm(js+1) == pgroup%sm(js) .and.
     &             pgroup%ndts(js) == 2*pgroup%ndts(js-1)) then
            --- Automatically set the sw for species with larger ndts since in
            --- general there will be no particles in those groups initially
            --- (so npinje_s will be zero and the above skipped).
            pgroup%sw(js+1) = pgroup%sw(js)
          endif
        endif
      enddo

  Set the variable INJCTCNT
  This is used for loading the injection particles.  It ensures that the
  injected particles have new random numbers.
      injctcnt = sum(pgroup%nps) + 1 + randoffset

  Set up Setpwork3d arrays
  --- Estimate number of particles injected
      nnmax = 0
      do is=1,ns
        nn = 0
        do ij=1,ninject
          nn = nn + npinje_s(is)*finject(ij,is)
        enddo
        if (nn > nnmax) nnmax = nn
      enddo
      if (nnmax > npgrp) then
        npgrp = nnmax
        call gchange("Setpwork3d",0)
      endif

  Setup transverse injection

  Set the z cell size and number for transverse injection. The user can set nztinj.
      do ij=1,ntinj
        if (nztinj(ij) == 0) nztinj(ij) = nint((ztinjmx(ij) - ztinjmn(ij))/dz)
        if (nztinj(ij) == 0) nztinj(ij) = 1
        dztinj(ij) = (ztinjmx(ij) - ztinjmn(ij))/nztinj(ij)
        nztmax = max(nztmax, nztinj(ij))
      enddo
      call gchange("InjectVars",0)

  Set the default values of atinject and btinject and calculate the number
  of theta points if not already set.
      do ij=1,ntinj
        if (atinject(ij) == -1.) atinject(ij) = a0
        if (btinject(ij) == -1.) btinject(ij) = b0
        circummax = 0.
        do iz=0,nztmax
          if (atinjectz(iz,ij) == -1.) atinjectz(iz,ij) = atinject(ij)
          if (btinjectz(iz,ij) == -1.) btinjectz(iz,ij) = btinject(ij)
          if (thetamintinj(iz,ij) == -1. .and. thetamaxtinj(iz,ij) == -1.) then
            thetamintinj(iz,ij) = 0.
            thetamaxtinj(iz,ij) = 2.*pi
          endif
          circum = ellipseperimeter(atinjectz(:,ij),btinjectz(:,ij))*
     &             (thetamaxtinj(iz,ij) - thetamintinj(iz,ij))/(2.*pi)
          circummax = max(circummax,circum)
        enddo
        if (nttinj(ij) == 0) nttinj(ij) = nint(circummax/min(inj_dx,inj_dy))
        nttinjmax = max(nttinjmax,nttinj(ij))
      enddo
      call gchange("InjectVars",0)

  Setup some things for transverse injection if there are any sources defined.
      if (ntinj > 0) then

  Set up ftinject array if not set by the user (if sum of ftinject = 0).
  Give each injection source the same distribution of species.
        if (sum(ftinject) == 0.) then
          do is=1,ns
            ftinject(:,is) = sp_fract(is)
          enddo
        endif

  In case there is no axial injection, set the particle weight.
        do is=1,pgroup%ns
          --- If no particles are to be injected from this species, then skip
          --- the code below.
          if (sum(ftinject(:,is)) == 0.) cycle
          jsid = pgroup%sid(is-1)
          if (pgroup%sq(is) == 0.) pgroup%sq(is) = zion_s(jsid+1)*echarge
          if (pgroup%sm(is) == 0.) pgroup%sm(is) = aion_s(jsid+1)*amu
          if (pgroup%sw(is) == 0. .and. ntinject(jsid+1) > 0) then
            pgroup%sw(is) = abs((ibeam_s(jsid+1)*dt*pgroup%ndts(is-1)/
     &                          pgroup%sq(is))/ntinject(jsid+1))
          endif
        enddo

      endif

  Determine what the value of inj_ns should be. It can either be 1 or
  ns: if only one species will be injected from each source, the it is 1,
  otherwise, ns. This is needed in order that inj_prev and inj_np are set
  correctly. If multiple species are being injected from a single source,
  then inj_prev and inj_np must be saved for each species. In order to
  minimize the bookkeepping, if more than one species is injected from any
  source, than the two arrays are made big enough to save the data for all
  species for each source. If cases come up where this is too much wasted
  space, then the code can later be changed by only making inj_ns as big
  as is needed, i.e. the maximum number of species emitted from any one
  source.  
      inj_ns = 1
      do ij=1,ninject
        nn = 0
        do is=1,ns
          if (finject(ij,is) > 0.) nn = nn + 1
        enddo
        if (nn > 1) inj_ns = ns
      enddo

  Determine what the value if inj_ninj should be. It can either be 1 or
  ninject: if no sources are within 2 grid cells of another, then it is 1,
  otherwise ninject. This is needed since some parameters, inj_prev,
  inj_np, inj_area, and inj_rho, are stored on grids and must be known for
  each emitting source. If the sources are too close, then the data can
  overlap. In that case, the data is saved in seperate planes for each
  source. In order to minimize bookkeepping, if any two source (or more)
  sources are too close, then the arrays are made big enough so that all
  of the data is saved seperately for each source (whether or not they are
  too close to another). If cases comes up where this is too much wasted
  space, then the code can be changed so that inj_ninj is only made as big
  as needed, i.e. the maximum number of sources that are too close to each
  other.  
  The radius of each source is taken to be the maximum of a and b. This
  makes the checks much simpler but overly conservative.
      inj_ninj = 1
      maxdxdy = max(inj_dx,inj_dy)
      do ij1=1,ninject
        maxab1 = max(ainject(ij1),binject(ij1))
        minab1 = min(ainjmin(ij1),binjmin(ij1))
        do ij2=1,ninject
          if (ij1 == ij2) cycle
          dd = sqrt((xinject(ij1)-xinject(ij2))**2 +
     &              (yinject(ij1)-yinject(ij2))**2)
          maxab2 = max(ainject(ij2),binject(ij2))
          minab2 = min(ainjmin(ij2),binjmin(ij2))
          --- Check if the outer edges are near each other. If not
          --- go to next pair.
          if (maxab1 + maxab2 < dd - 2.*maxdxdy) cycle
          --- Check if one is inside the other.
          if (dd + maxab1 < minab2 - 2.*maxdxdy) cycle
          if (dd + maxab2 < minab1 - 2.*maxdxdy) cycle
          --- If it gets through the checks above, these two source are
          --- too close
          inj_ninj = ninject
        enddo
      enddo
      --- Also check if sources are at different z locations. If so, then
      --- save data for each source seperately. Checks if RMS of zinject
      --- is zero.
      if (sum(zinject**2) - sum(zinject)**2/ninject /= 0.) then
        inj_ninj = ninject
      endif


  Allocate inject arrays and fill with source location information.
  Note that gchange is called instead of gallot since some the of arrays
  in InjectVars3d may have been allocated by the user for specialized
  control of injection.
      call gchange("InjectVars3d",0)
      inj_xwide = max(1,inj_xwide)
      inj_ywide = max(1,inj_ywide)
      if(l_inj_rz .or. l_inj_rz_grid) then
        call fill_inj_rz()
      else
        call fill_inj()
      end if

  Check if the voltages were specified.  If so, set so conductor will be
  set up for source.
       do ij=1,ninject
         if (vinject(ij) /= 0) lvinject = .true.
       enddo

  Set vbeamfrm to zero so that the grid does not move off of the source,
  causing a code crash.
      vbeamfrm = 0.

  When using constant current injection, turn off the flag that specifies
  use of the normal E field with the Child-Langmuir profile
      if (inject == 1 .or. inject == 4 .or. inject == 5 .or. inject == 6 .or.
     &    inject == 7 .or. inject == 8) then
        linj_enormcl = .false.
       endif

!$OMP MASTER
      if (lw3dtimesubs) timeinjctint = timeinjctint + wtime() - substarttime
!$OMP END MASTER
      return
      end

[injctint]
      subroutine fill_inj()
      use Subtimersw3d
      use InPart
      use InMesh3d
      use InjectVars
      use InjectVars3d

      --- Fill the injection grid array.  A padding of 2 grid cells is also
      --- set.
      --- The array inj_grid holds the axial location of the injection source
      --- as a function of the transverse coordinates, in the lab frame.
      --- The array inj_angl holds the angle due to the curvature of the source
      --- as a function of the transverse coordinates.


      integer(ISZ):: ij,ix,iy
      real(kind=8):: xx,yy,rrsq,ainj,binj,ai_dx,bi_dy,dxi,dyi
      real(kind=8):: xl,yl,xu,yu,x1,x2,txl,tyl,txu,tyu
      real(kind=8):: ainji,binji
      real(kind=8):: area
      logical(ISZ):: linj_cell
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      dxi = 1./inj_dx
      dyi = 1./inj_dy

  Loop over the injection sources.
      do ij=1,ninject

        --- Provide some simple error checking
        if (ainject(ij) > abs(rinject(ij)) .or.
     &      binject(ij) > abs(rinject(ij))) then
          call kaboom("fill_inj: Error: the radius of curvature of the injection source must
     & be greater than or equal to its transverse radius")
          return
        endif

        --- If the injection radii are zero, then skip
        if (ainject(ij) == 0. .or. binject(ij) == 0.) cycle

        --- Calculate the extent of the source plus guard cells.
        ainj    = ainject(ij)
        binj    = binject(ij)
        ainji   = 1./ainj
        binji   = 1./binj
        ai_dx = ainject(ij) + 2.*inj_dx
        bi_dy = binject(ij) + 2.*inj_dy
        --- Loop over transverse plane.
        do iy = 0,inj_ny
          do ix = 0,inj_nx
            --- transverse coordinate relative to center of source
            xx = ix*inj_dx + inj_xmmin(ij)
            yy = iy*inj_dy + inj_ymmin(ij)
            
            --- Check whether the cell is within the injection area.
            --- This depends on whether the source is elliptical or
            --- rectangular.
            if(linj_rectangle) then
              linj_cell = (abs(xx) <= ai_dx) .and. (abs(yy) <= bi_dy)
            else
              linj_cell = ((xx*bi_dy)**2 + (yy*ai_dx)**2) <= (ai_dx*bi_dy)**2
            endif

            --- If the point is within source or guard cells, calculate both
            --- axial distance and angle of curvature.
            if (linj_cell) then
              --- Write the expression for inj_grid so as to avoid
              --- subtraction of large, similar numbers when rinject is large.
              inj_grid(ix,iy,ij) = (rinject(ij) -
     &               sqrt(max(0.,rinject(ij)**2 - xx**2 - yy**2)))
              rrsq = xx**2 +  yy**2
              if (rrsq > rinject(ij)**2) rrsq = max(ainject(ij),binject(ij))**2
              inj_grid(ix,iy,ij) = rrsq/
     &             (abs(rinject(ij)) + sqrt(max(0.,rinject(ij)**2 - rrsq)))
              if (rinject(ij) < 0.) inj_grid(ix,iy,ij) = -inj_grid(ix,iy,ij)

              inj_angl(ix,iy,ij) = asin(sqrt(rrsq)/rinject(ij))

              --- Calculate fraction of grid cell which is within the emitting
              --- surface.  Used to scale the charge density on the emitting
              --- surface.
              --- The calculation is done with the emitting area scaled to
              --- a unit circle.
              --- The result is the fraction of the two by two block around
              --- the grid point which is inside the emitting surface.

              --- First, put the grid point in the first quadrant and scale it.
              --- The max(0,) chop off any part that crosses the x or y axis.
              xl = max(0.,(abs(xx) - 0.5*inj_xwide*inj_dx)*ainji)
              yl = max(0.,(abs(yy) - 0.5*inj_ywide*inj_dy)*binji)
              xu =        (abs(xx) + 0.5*inj_xwide*inj_dx)*ainji
              yu =        (abs(yy) + 0.5*inj_ywide*inj_dy)*binji

              --- If the two by two block is completely outside emitting
              --- surface, set the area to zero.
              if (xl**2 + yl**2 >= 1.) then
                area = 0.

              --- If two by two block straddles edge of emitting surface, do
              --- the calculation.
              elseif (xl**2 + yl**2 < 1. .and.
     &                                1. < xu**2 + yu**2) then
                --- Calculate area of block within first quadrant.
                x1 = max(xl, sqrt(max(0.,1. - yu**2)))
                x2 = min(xu, sqrt(max(0.,1. - yl**2)))
                area = (x1 - xl)*(yu - yl) +
     &                 0.5*x2*sqrt(1. - x2**2) + 0.5*asin(x2) -
     &                 0.5*x1*sqrt(1. - x1**2) - 0.5*asin(x1) -
     &                 (x2 - x1)*yl
                --- If block extended past x axis, calculate the area of
                --- that piece.
                if (abs(xx) - inj_dx < 0.) then
                  txu = - (abs(xx) - inj_dx)*ainji
                  txl = 0.
                  x1 = max(txl, sqrt(max(0.,1. - yu**2)))
                  x2 = min(txu, sqrt(max(0.,1. - yl**2)))
                  area = area + (x1 - txl)*(yu - yl) +
     &                          0.5*x2*sqrt(1. - x2**2) + 0.5*asin(x2) -
     &                          0.5*x1*sqrt(1. - x1**2) - 0.5*asin(x1) -
     &                          (x2 - x1)*yl
                endif
                --- If block extended past y axis, calculate the area of
                --- that piece.
                if (abs(yy) - inj_dy < 0.) then
                  tyu = - (abs(yy) - inj_dy)*binji
                  tyl = 0.
                  x1 = max(xl, sqrt(max(0.,1. - tyu**2)))
                  x2 = min(xu, sqrt(max(0.,1. - tyl**2)))
                  area = area + (x1 - xl)*(tyu - tyl) +
     &                          0.5*x2*sqrt(1. - x2**2) + 0.5*asin(x2) -
     &                          0.5*x1*sqrt(1. - x1**2) - 0.5*asin(x1)
                endif

                --- Normalize the area so that the total area of the block
                --- would be 1 (since area was calculated above on a
                --- unit circle).
                area = area*dxi*dyi*ainj*binj/(inj_xwide*inj_ywide)

                --- Prevent the area from being too small. When it is too
                --- small, there are problems when the code does rho/area,
                --- producing erroneously large numbers.
                --- The min value of 0.01 is arbitrary, but should provide a 
                --- nice balance between a number that is small enough so
                --- cells near the edge don't contribute too much and large
                --- enough to avoid computational problems.
                area = max(area,.01)

              else
                --- If the block is completely within the emitting surface,
                --- set the area to 1.
                area = 1.
              endif

              --- Take into account the tent function of the charge density,
              --- i.e. convert the integral of the area to an integral
              --- of the tent function over the part of the two by two block
              --- within the emitting surface.  This is an approximate
              --- calculation.  The conversion was obtained by completely
              --- integrating over one dimension and partially intregrating
              --- over the other.
              if (area <= 0.5) then
                area = 2.*area**2
              else
                area = 4.*area - 2.*area**2 -1.
              endif

              --- Now assign to inj_area. This is done here to simplify the
              --- coding above, primarily because of the third index here.
              --- Note that the min works, since inj_ninj is either 1 or
              --- ninject: if 1, then the min is always 1, if ninject, then
              --- the min is always ij.
              inj_area(ix,iy,ij) = area

            endif
          enddo
        enddo
      enddo

!$OMP MASTER
      if (lw3dtimesubs) timefill_inj = timefill_inj + wtime() - substarttime
!$OMP END MASTER
      return
      end

[injctint]
      subroutine fill_inj_rz()
      use Subtimersw3d
      use InPart
      use InMesh3d
      use InjectVars
      use InjectVars3d

      --- Fill the injection grid array.  A padding of 2 grid cells is also
      --- set.
      --- The array inj_grid holds the axial location of the injection source
      --- as a function of the transverse coordinates, in the lab frame.
      --- The array inj_angl holds the angle due to the curvature of the source
      --- as a function of the transverse coordinates.


      integer(ISZ):: ij,ix
      real(kind=8):: xx,ainj,ai_dx,dxi
      real(kind=8):: xl,xu
      real(kind=8):: ainji
      real(kind=8):: area
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      dxi = 1./inj_dx

  Loop over the injection sources.
      do ij=1,ninject
        --- Calculate the extent of the source plus guard cells.
        ainj    = ainject(ij)
        ainji   = 1./ainj
        ai_dx = ainject(ij) + 2.*inj_dx
        --- Loop over transverse plane.
        do ix = 0,inj_nx
            --- transverse coordinate relative to center of source
            xx = ix*inj_dx + inj_xmmin(ij)
            --- If the point is within source or guard cells, calculate both
            --- axial distance and angle of curvature.
            if (xx - ai_dx <= 0.) then
              --- Write the expression for inj_grid so as to avoid
              --- subtraction of large, similar numbers when rinject is large.
              inj_grid(ix,0,ij) = zinject(ij) + (rinject(ij) -
     &               sqrt(max(0.,rinject(ij)**2 - xx**2)))
              inj_grid(ix,0,ij) = xx**2 /
     &         (abs(rinject(ij)) + sqrt(max(0.,rinject(ij)**2 - xx**2)))
              if (rinject(ij) < 0.) inj_grid(ix,0,ij) = -inj_grid(ix,0,ij)
              if (xx**2 > rinject(ij)**2) inj_grid(ix,0,ij) = rinject(ij)

              inj_angl(ix,0,ij) = asin(max(-1.,min(1.,abs(xx)/rinject(ij))))

              --- Calculate fraction of grid cell which is within the emitting
              --- surface.  Used to scale the charge density on the emitting
              --- surface.
              --- The calculation is done with the emitting area scaled to
              --- a unit circle.
              --- The result is the fraction of the two by two block around
              --- the grid point which is inside the emitting surface.

              --- First, put the grid point in the first quadrant and scale it.
              --- The max(0,) chop off any part that crosses the x or y axis.
              xl = max(0.,(abs(xx) - 0.5*inj_xwide*inj_dx)*ainji)
              xu =        (abs(xx) + 0.5*inj_xwide*inj_dx)*ainji

              --- If the two by two block is completely outside emitting
              --- surface, set the area to zero.
              if (xl >= 1.) then
                area = 0.

              --- If two by two block straddles edge of emitting surface, do
              --- the calculation.
              elseif (xl < 1. .and. 1. < xu) then
                if(l_inj_area) then
                  area = (0.5*(xl*ainj+ainject(ij))*(ainject(ij)-xl*ainj))
     &                 / (xx*inj_dx*inj_xwide)
                else
                  area = 0.
                end if

              else
                --- If the block is completely within the emitting surface,
                --- set the area to 1.
                area = 1.
              endif

              --- Now assign to inj_area. This is done here to simplify the
              --- coding above, primarily because of the third index here.
              --- Note that the min works, since inj_ninj is either 1 or
              --- ninject: if 1, then the min is always 1, if ninject, then
              --- the min is always ij.
              inj_area(ix,0,min(ij,inj_ninj)) = area

            endif
        enddo
      enddo

!$OMP MASTER
      if (lw3dtimesubs) timefill_inj = timefill_inj + wtime() - substarttime
!$OMP END MASTER
      return
      end

[getinj_phi] [getinj_phi_3d] [getinj_phi_mr] [inj_addtemp3d] [inj_sete3d] [inj_sete_3darray] [inj_setrho3d] [inj_setrho3d_z] [inj_setrhomr] [inject3d]
      subroutine inj_transform(np,x,y,z,ni,ijp,tsign,ishift)
      use Subtimersw3d
      use InjectVars
      use InjectVars3d
      integer(ISZ):: np,ni
      real(kind=8):: x(np),y(np),z(np)
      integer(ISZ):: ijp(ni),tsign
      integer(ISZ):: ishift

  Transform coordinates from frame of source to frame of lab, or
  vice versa.
  When tsign = +1, transform from source frame to lab frame
       tsign = -1, transform from lab frame to source frame
  ishift specifies what kind of coordinate shift is done.
   ishift = 1, data is shifted by grid position
   ishift = 2, data is shifted by velocity angle
   for any other value, the shift is skipped.

      real(kind=8):: theta,phi,ct,st,cp,sp
      real(kind=8):: xp,yp,zp
      integer(ISZ):: i,ij
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      --- If transforming to coordinates of source, subtract source center
      if (tsign < 0 .and. ishift > 0) then
        if (ishift == 1) then
          do i=1,np
            ij = ijp(min(i,ni))
            if (0 < ij .and. ij <= ninject  ) then
              x(i) = x(i) - xinject(ij)
              y(i) = y(i) - yinject(ij)
              z(i) = z(i) - zinject(ij)
            endif
          enddo
        else if (ishift == 2) then
          call kaboom("inj_transform: Error: invalid ishift value - if you get this error,
     & report it to DPGrote@lbl.gov")
          return
          --- This code is never used and so is commented out. Note also that
          --- vzinject now has a second dimension, ns.
          do i=1,np
            ij = ijp(min(i,ni))
            if (0 < ij .and. ij <= ninject  ) then
              x(i) = x(i) - xpinject(ij)*vzinject(ij)
              y(i) = y(i) - ypinject(ij)*vzinject(ij)
              z(i) = z(i) - vzinject(ij)
            endif
          enddo
        endif
      endif

      if (maxval(abs(thetainject)) > 0. .or. maxval(abs(phiinject)) > 0.) then
        if (maxval(ijp) == minval(ijp)) then
          ij = ijp(1)
          --- Convert xpinject and ypinject into the theta and phi angles.
          --- In the transformation from the source to the lab frame,
          --- for points along the axis in the source frame, the slopes
          --- in the lab frame must be preserved. So, for two points
          --- (0,0,-z) and (0,0,z) in the source frame, the slopes are given by
          --- xpinject = (x2 - x1)/(z2 - z1) and
          --- ypinject = (y2 - y1)/(z2 - z1)
          --- where (x1,y1,z1) and (x2,y2,z2) are the two points rotated into
          --- the lab frame. Plugging in the transformation equations below
          --- produces and inverting produces the following result.
          theta = atan(xpinject(ij))
          phi = atan(ypinject(ij)*cos(theta))
          theta = thetainject(ij)
          phi = phiinject(ij)
          ct = cos(theta)
          st = sin(theta)
          cp = cos(phi)
          sp = sin(phi)
        endif
        do i=1,np
          ij = ijp(min(i,ni))
          if (0 < ij .and. ij <= ninject  ) then
            if (ni > 1) then
              --- See comments above.
              theta = thetainject(ij)
              phi = phiinject(ij)
              ct = cos(theta)
              st = sin(theta)
              cp = cos(phi)
              sp = sin(phi)
            endif
            if (tsign == -1) then
              --- Transform from lab frame to source frame
              --- This transformation is a rotation about the y axis by theta,
              --- followed by a rotation about the new x axis by phi.
              xp = +x(i)*ct              - z(i)*st
              yp = -x(i)*st*sp + y(i)*cp - z(i)*ct*sp
              zp = +x(i)*st*cp + y(i)*sp + z(i)*ct*cp
            else
              --- Transform from source frame to lab frame
              --- This transformation is a rotation about the x axis by phi,
              --- followed by a rotation about the new y axis by theta.
              xp = +x(i)*ct - y(i)*st*sp + z(i)*st*cp
              yp =          + y(i)*cp    + z(i)*sp
              zp = -x(i)*st - y(i)*ct*sp + z(i)*ct*cp
            endif
            x(i) = xp
            y(i) = yp
            z(i) = zp
          endif
        enddo
      endif

      --- If transforming from coordinates of source, add source center
      if (tsign > 0 .and. ishift > 0) then
        if (ishift == 1) then
          do i=1,np
            ij = ijp(min(i,ni))
            if (0 < ij .and. ij <= ninject  ) then
              x(i) = x(i) + xinject(ij)
              y(i) = y(i) + yinject(ij)
              z(i) = z(i) + zinject(ij)
            endif
          enddo
        else if (ishift == 2) then
          call kaboom("inj_transform: Error: invalid ishift value - if you get this error,
     & report it to DPGrote@lbl.gov")
          return
          --- This code is never used and so is commented out. Note also that
          --- vzinject now has a second dimension, ns.
          do i=1,np
            ij = ijp(min(i,ni))
            if (0 < ij .and. ij <= ninject  ) then
              x(i) = x(i) + xpinject(ij)*vzinject(ij)
              y(i) = y(i) + ypinject(ij)*vzinject(ij)
              z(i) = z(i) + vzinject(ij)
            endif
          enddo
        endif
      endif

!$OMP MASTER
      if (lw3dtimesubs) timeinj_transform = timeinj_transform + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inject3d] [multigridrzf_risetime]
      subroutine getinj_phi()
      use Subtimersw3d
      use InjectVars
      use InjectVars3d
      use InGen3d,Only: solvergeom,RZgeom,l2symtry,l4symtry
      use Picglb,Only: xpminlocal,xpmaxlocal,ypminlocal,ypmaxlocal,
     &                 zpminlocal,zpmaxlocal,zgrid

  Calculate potential drop from emitting surface at distance
  of dz from the surface.  This is only done for grid cells
  within two grid cells of the elliptical emitting surface, and
  within the axial extent of the grid. This is done over the
  full axial extent since points which are outside of the
  injection region maybe needed for the interlopation below
  to get zp.

      integer(ISZ):: ij,ix,iy,i2x,i2y,i2z
      real(kind=8):: dxi,dyi,inj_dz_tmp
      real(kind=8):: xxsq,yysq,r1sq,r2sq
      real(kind=8):: ainjsqi,binjsqi
      real(kind=8):: xm,ym,rrsq,aa,az,rr
      real(kind=8),allocatable:: xx(:),yy(:),zz(:),pp(:)
      integer(ISZ),allocatable:: xi(:),yi(:),in(:)
      integer(ISZ):: nn,ii,i1,i2
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      if (ninject == 0) return
      if (inject < 1 .or. inject > 8 .or. inject == 6) return

      allocate(xx((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(yy((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(zz((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(xi((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(yi((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(in((1+inj_nx)*(1+inj_ny)*ninject))
      allocate(pp((1+inj_nx)*(1+inj_ny)*ninject))

      dxi = 1./inj_dx
      dyi = 1./inj_dy

      if(inj_nzɭ) then
        inj_dz_tmp = inj_dz
        inj_dz = inj_dz0
      end if

      --- loop over injection sources
      --- i1 and i2 keep track of the data as new data from each injection
      --- source is added. i1 is the starting location for the each set of
      --- data and i2 is the location of the last data point.
      i1 = 1
      do ij=1,ninject
        i2 = i1 - 1

        --- Set some temporaries.
        ainjsqi = 1./(ainject(ij) + 2.*inj_dx)**2
        binjsqi = 1./(binject(ij) + 2.*inj_dy)**2

        do iy = 0,inj_ny
          do ix = 0,inj_nx
            xm = inj_xmmin(ij) + ix*inj_dx
            ym = inj_ymmin(ij) + iy*inj_dy
            xxsq = xm**2
            yysq = ym**2
            if(linj_rectangle) then
              r1sq = xxsq*ainjsqi
              r2sq = yysq*binjsqi
            else
              r1sq = xxsq*ainjsqi + yysq*binjsqi
              r2sq = 0. 
            endif
            if (r1sq < 1.0 .and. r2sq < 1.0) then

              i2 = i2 + 1

              --- Save coordinates relative to injection grid.
              xi(i2) = ix
              yi(i2) = iy

              --- angle of point in transverse plane
              aa = atan2(ym,dvnz(xm))
              az = inj_angl(ix,iy,ij)

              --- Find coordinates of the point a distance dz in front
              --- of the source along a line perpendicular to the
              --- emitting surface.
              xx(i2) = xm - inj_dz*cos(aa)*sin(az)*inj_d(ij)
              yy(i2) = ym - inj_dz*sin(aa)*sin(az)*inj_d(ij)
              zz(i2) = inj_grid(ix,iy,ij) + cos(az)*inj_d(ij)*inj_dz
              in(i2) = ij

            endif
          enddo
        enddo

        call inj_transform(i2-i1+1,xx(i1),yy(i1),zz(i1),1,ij,1,1)

        --- Select only those data points which are within the grid.
        --- This also removes particles not within the domain of this
        --- process.
        nn = i1
        do ii=i1,i2
          if (solvergeom == RZgeom) then
            rr = xx(ii)**2 + yy(ii)**2
            if (rr < xpminlocal**2 .or. rr >= xpmaxlocal**2 .or.
     &          zz(ii) < zpminlocal+zgrid .or.
     &          zz(ii) >= zpmaxlocal+zgrid) cycle
          else
            if (l4symtry) then
              xm = abs(xx(ii))
              ym = abs(yy(ii))
            else if (l2symtry) then
              xm = xx(ii)
              ym = abs(yy(ii))
            else
              xm = xx(ii)
              ym = yy(ii)
            endif
            if (xm < xpminlocal .or. xm >= xpmaxlocal .or.
     &          ym < ypminlocal .or. ym >= ypmaxlocal .or.
     &          zz(ii) < zpminlocal+zgrid .or.
     &          zz(ii) >= zpmaxlocal+zgrid) cycle
          endif
          if (nn < ii) then
            xi(nn) = xi(ii)
            yi(nn) = yi(ii)
            xx(nn) = xx(ii)
            yy(nn) = yy(ii)
            zz(nn) = zz(ii)
            in(nn) = in(ii)
          endif
          nn = nn + 1
        enddo
        i2 = nn - 1

        i1 = i2 + 1

      enddo
      nn = i1 - 1

      pp = 0.
      call fetchphi(nn,xx,yy,zz,pp)

      --- Calculate the difference between phi at that point and phi on
      --- the emitting surface.
      inj_phi = 0.
      do ii=1,nn
        inj_phi(xi(ii),yi(ii),in(ii)) = vinject(in(ii)) - pp(ii)
      enddo
        
      if(inj_nzɭ) then
        inj_dz = inj_dz_tmp
        call getinj_phi_mr(nn,xx,yy,zz,xi,yi)
      end if

#ifdef MPIPARALLEL
      --- The result calculated on each processor is gathered on all
      call parallelnonzerorealarray(inj_phi,(1+inj_nx)*(1+inj_ny)*inj_ninj)
#endif

      --- Smooth out inj_phi for this emitting surface.
      if (inj_nsmooth > 0) then
        do ij=1,ninject
          call inj_smoother(inj_nx,inj_ny,inj_phi(:,:,ij),inj_dx,inj_dy,
     &                      inj_xmmin(ij),inj_ymmin(ij),
     &                      xinject(ij),yinject(ij),ainject(ij),binject(ij),
     &                      inj_nsmooth)
        enddo
      endif

      --- Calculate the transverse fields
      --- Note that sign of inj_ex and inj_ey is not what you think it might
      --- be since inj_phi is actually vinject - phi, with an extra minus sign.
      if (linj_eperp) then

        if (l_inj_rz .or. l_inj_rz_grid) then

          --- Only Ex is nonzero. Ey=0 by symmetry
          do ij = 1,inj_ninj
            do ix = 1,inj_nx-1
              inj_ex(ix,0,ij) = (inj_phi(ix+1,0,ij) - inj_phi(ix-1,0,ij))*dxi*0.5
            enddo
          enddo

        else

          do ij = 1,inj_ninj
            do iy = 1,inj_ny-1
              do ix = 1,inj_nx-1
                inj_ex(ix,iy,ij)=(inj_phi(ix+1,iy  ,ij)-inj_phi(ix-1,iy  ,ij))*dxi*0.5
                inj_ey(ix,iy,ij)=(inj_phi(ix  ,iy+1,ij)-inj_phi(ix  ,iy-1,ij))*dyi*0.5
              enddo
            enddo
            do ix = 1,inj_nx-1
              inj_ex(ix,0,ij) = (inj_phi(ix+1,0,ij) - inj_phi(ix-1,0,ij))*dxi*0.5
            enddo
            do iy = 1,inj_ny-1
              inj_ey(0,iy,ij) = (inj_phi(0,iy+1,ij) - inj_phi(0,iy-1,ij))*dyi*0.5
            enddo
          enddo
        endif
      endif

      deallocate(xx,yy,zz,pp)
      deallocate(xi,yi,in)

!$OMP MASTER
      if (lw3dtimesubs) timegetinj_phi = timegetinj_phi + wtime() - substarttime
!$OMP END MASTER
      return
      end

[getinj_phi]
      subroutine inj_smoother(inj_nx,inj_ny,inj_phi,inj_dx,inj_dy,
     &                        inj_xmmin,inj_ymmin,
     &                        xinj,yinj,ainj,binj,inj_nsmooth)
      integer(ISZ):: inj_nx,inj_ny
      real(kind=8):: inj_phi(0:inj_nx,0:inj_ny)
      real(kind=8):: inj_dx,inj_dy,inj_xmmin,inj_ymmin,xinj,yinj,ainj,binj
      integer(ISZ):: inj_nsmooth

  This routine smoothes the normal electric field in front of the emitting
  surface, hopefully removing defects  at places where the emitting surface
  crosses grid lines.
  It holds the field at the center point fixed. For points outside of the
  emitter, a linear extrapolation is done using the results of a least
  square fit of the field as a function of radius for the points within
  a grid cell of the edge. The points outside are also held fixed.

      integer(ISZ):: ix,iy,ix0,iy0,is
      integer(ISZ):: ixm1,iym1,ixp1,iyp1
      real(kind=8):: rrsq1,rrsq2
      real(kind=8):: isum,fsum,xsum,xsqsum,fxsum,c1,c2
      real(kind=8):: s(0:inj_nx,0:inj_ny)
      real(kind=8):: ainjsqi1,binjsqi1
      real(kind=8):: ainjsqi2,binjsqi2

      --- Calculate the parameters for the least square fit.
      isum = 0.
      fsum = 0.
      xsum = 0.
      xsqsum = 0.
      fxsum = 0.
      ainjsqi1 = 1./ainj**2
      binjsqi1 = 1./binj**2
      ainjsqi2 = 1./(ainj - inj_dx)**2
      binjsqi2 = 1./(binj - inj_dx)**2
      do iy = 0,inj_ny
        do ix = 0,inj_nx
          rrsq1 = (ix*inj_dx + inj_xmmin)**2*ainjsqi1 +
     &            (iy*inj_dy + inj_ymmin)**2*binjsqi1
          rrsq2 = (ix*inj_dx + inj_xmmin)**2*ainjsqi2 +
     &            (iy*inj_dy + inj_ymmin)**2*binjsqi2
          if (rrsq1 < 1. .and. rrsq2 > 1.) then
            isum = isum + 1.
            fsum = fsum + inj_phi(ix,iy)
            xsum = xsum + sqrt(rrsq1)
            xsqsum = xsqsum + rrsq1
            fxsum = fxsum + sqrt(rrsq1)*inj_phi(ix,iy)
          endif
        enddo
      enddo
      c1 = (fsum*xsqsum/xsum - fxsum)/(isum*xsqsum/xsum - xsum)
      c2 = (fsum - c1*isum)/xsum

      --- Set values for points just outside of the emitter edge, using
      --- the equation from the fit above.
      ainjsqi2 = 1./(ainj + 2.*inj_dx)**2
      binjsqi2 = 1./(binj + 2.*inj_dx)**2
      do iy = 0,inj_ny
        do ix = 0,inj_nx
          rrsq1 = (ix*inj_dx + inj_xmmin)**2*ainjsqi1 +
     &            (iy*inj_dy + inj_ymmin)**2*binjsqi1
          rrsq2 = (ix*inj_dx + inj_xmmin)**2*ainjsqi2 +
     &            (iy*inj_dy + inj_ymmin)**2*binjsqi2
          if (rrsq1 > 1. .and. rrsq2 < 1.) then
            inj_phi(ix,iy) = c1 + c2*sqrt(rrsq1)
          endif
        enddo
      enddo

      --- Do the smoothing.
      ix0 = -inj_xmmin/inj_dx
      iy0 = -inj_ymmin/inj_dy
      do is = 1,inj_nsmooth
        s = inj_phi
        do iy = 0,inj_ny
          do ix = 0,inj_nx
            rrsq1 = (ix*inj_dx + inj_xmmin)**2*ainjsqi1 +
     &              (iy*inj_dy + inj_ymmin)**2*binjsqi1
            if (rrsq1 < 1. .and. (ix /= ix0 .or. iy /= iy0)) then
              ixm1 = ix - 1
              ixp1 = ix + 1
              iym1 = iy - 1
              iyp1 = iy + 1
              if (ix == 0) ixm1 = 1
              if (ix == inj_nx) ixp1 = inj_nx-1
              if (iy == 0) iym1 = 1
              if (iy == inj_ny) iyp1 = inj_ny-1
              inj_phi(ix,iy) = 0.0625*(s(ixm1,iym1) + s(ixm1,iyp1)  +
     &                                 s(ixp1,iym1) + s(ixp1,iyp1)) +
     &                         0.1250*(s(ixm1,iy  ) + s(ixp1,iy  )  +
     &                                 s(ix  ,iym1) + s(ix  ,iyp1)) +
     &                         0.2500*(s(ix  ,iy  ))
            endif
          enddo
        enddo
      enddo

      return
      end

[padvnc3d]
      subroutine inj_sete(pgroup,ipmin,np,ex,ey,ez)
      use Subtimersw3d
      use ParticleGroupmodule
      use InjectVars
      type(ParticleGroup):: pgroup
      integer(ISZ):: ipmin,np
      real(kind=8):: ex(np),ey(np),ez(np)
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      if (inject <= 0 .and. tinject <= 0) return

      call inj_sete3d(np,pgroup%xp(ipmin),pgroup%yp(ipmin),pgroup%zp(ipmin),
     &                pgroup%pid(ipmin,injpid),ex,ey,ez)
      call tinj_sete3d(np,pgroup%xp(ipmin),pgroup%yp(ipmin),pgroup%zp(ipmin),
     &                 pgroup%pid(ipmin,injpid),ex,ey,ez)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_sete = timeinj_sete + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inj_sete] [inject3d]
      subroutine inj_sete3d(npart,xp,yp,zp,pid,ex,ey,ez)
      use Subtimersw3d
      use InGen3d
      use InPart
      use InjectVars
      use InjectVars3d
      integer(ISZ):: npart
      real(kind=8):: xp(npart),yp(npart),zp(npart),pid(npart)
      real(kind=8):: ex(npart),ey(npart),ez(npart)

  Calculate the electric field from the grid for particles near the emitting
  surface.  The normal electric field is calculated from the potential drop
  across a length of 'dz' along a line normal to the emitting surface.  The
  field components are then obtained from the normal field. Optionally,
  the tangential fields near the emitting surface can also be included.
  Eventually this routine could be expanded to calculate the E field near
  any conductor surface.

      real(kind=8),allocatable:: xx(:),yy(:),zz(:)
      real(kind=8),allocatable:: tex(:),tey(:),tez(:)
      integer(ISZ),allocatable:: ijp(:)
      real(kind=8),pointer:: zii(:),inj_di(:),rinjecti(:)
      integer(ISZ):: ip,ix,iy,iz,ij
      real(kind=8):: rinj,rr2,wr,dd,ddi,ca,rinji
      real(kind=8):: aa,az,atx,aty,px,py,pz,wx,wy,wz,en,etx,ety
      real(kind=8):: dxi,dyi,dzi
      real(kind=8):: sx,sy
      real(kind=8):: fourthirds,onethird
      integer(ISZ):: spreadx,spready
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      --- The user can force the code to skip the special calculation of the 
      --- E fields for particles behind the virtual surface.
      if (linj_efromgrid) return

      allocate(xx(npart),yy(npart),zz(npart))
      allocate(tex(npart),tey(npart),tez(npart))
      allocate(ijp(npart))

      if(inj_nzɭ) call inj_sete_3darray(npart,xp,yp,zp,pid,inj_dz,ex,ey,ez)

      if(solvergeom==XYZgeom .or. solvergeom==RZgeom) then
        if(l_inj_rz .or. l_inj_rz_grid) then
          spreadx = 1
          spready = 0
        else
          spreadx = 1
          spready = 1
        end if
      elseif(solvergeom==XZgeom) then
        spreadx = 1
        spready = 0
      elseif(solvergeom==Zgeom) then
        spreadx = 0
        spready = 0
      end if

      tex = 0.
      tey = 0.
      tez = 0.

      dxi = 1./inj_dx
      dyi = 1./inj_dy
      dzi = 1./inj_dz
      fourthirds = 4./3.
      onethird = 1./3.

      allocate(zii(ninject),inj_di(ninject),rinjecti(ninject))
      zii = 1./(inj_dz*inj_d)
      inj_di = 1./inj_d
      rinjecti = 1./rinject

      --- Transform the particles into the frame of the injection source,
      --- copying particle data to temporary arrays.
      xx = xp
      yy = yp
      zz = zp
      ijp = int(pid)
      call inj_transform(npart,xx,yy,zz,npart,ijp,-1,1)

      --- Loop over particles
      do ip=1,npart

        --- Get number of injection source of particle.  The particle id
        --- has the number of the injection source the particle was
        --- emitted from. The int is needed since it is doing double
        --- duty and has other information stored in the fractional part.
        ij = ijp(ip)
        if (ij <= 0 .or. ij > ninject) cycle

        --- set temporaries
        rinj = rinject(ij)
        rinji = rinjecti(ij)

        --- Calculate distance of particle from the emitting surface
        rr2 = xx(ip)**2 + yy(ip)**2
        if (abs(zz(ip)) < abs(rinj)) then
        if (-abs(rinj) < zz(ip) .and. zz(ip) < abs(rinj)) then
          --- The expression below for zz<rinj is mathematically identical
          --- to the commented out expression below.  That form is
          --- used so that for large values of rinj, i.e. a flat emitting
          --- surface, the correct value of the distance is calculated,
          --- namely zz.
          dd = rinj - sqrt(rr2 + (rinj - zz(ip))**2)
          dd = (2.*zz(ip) - (zz(ip)**2 + rr2)*rinji)/
     &         (1. + sqrt(rr2*rinji**2 + (1. - zz(ip)*rinji)**2))
        else
          --- When zz>rinj, a seperate equation is needed.
          dd = abs(rinj) + sqrt(rr2 + (zz(ip) - rinj)**2)
        endif
        dd = dd*dzi

        --- Only calculate E-field if particle close to emitting surface.
        if (0.0 <= inj_d(ij)*dd .and. abs(dd) <= abs(inj_d(ij))) then
          --- The check if a particle is within the radial area of the
          --- source is probably not needed. It has been removed since there
          --- are some cases which this check is incorrect - for example
          --- in a source with a diverging beam. The particles spread out
          --- beyond the edge of the source and would not pass this check.
     &    ((xx(ip)*binject(ij))**2 + (yy(ip)*ainject(ij))**2 <=
     &    (ainject(ij)*binject(ij))**2) .and.
     &    ((xx(ip)*binjmin(ij))**2 + (yy(ip)*ainjmin(ij))**2 >=
     &    (ainjmin(ij)*binjmin(ij))**2) ) then

          --- angle of point in transverse plane
          aa = atan2(yy(ip),xx(ip))

          --- If using axisymmetric injection, convert x and y to radius.
          if (l_inj_rz .or. l_inj_rz_grid) then
            xx(ip) = sqrt(rr2)
            yy(ip) = 0.
          endif

          --- angle relative to z axis
          az = atan2(sqrt(rr2)*sign(1.,rinj),(rinj - zz(ip))*sign(1.,rinj))

          --- Find coordinates of the point on the secondary surface in front
          --- of the emitting surface along a line perpendicular to the
          --- emitting surface.
          wr = 1./(1. - inj_dz*dd*rinji)
          px = abs(xx(ip)*wr - inj_xmmin(ij))*dxi
          py = abs(yy(ip)*wr - inj_ymmin(ij))*dyi
          ix = spreadx*int(px)
          iy = spready*int(py)
          wx = spreadx*(px - ix)
          wy = spready*(py - iy)

          if (spreadx*px < 0. .or. px*spreadx > inj_nx .or.
     &        spready*py < 0. .or. py*spready > inj_ny) cycle

          ddi = dd*inj_di(ij)

          --- Calculate the normal electric field from the potential drop in
          --- front of the emitting surface. The normal field given by the
          --- Child-Langmuir solution (for planar surfaces) is used.
          en = zii(ij)*(inj_phi(ix        ,iy        ,ij)*(1.-wx)*(1.-wy) +
     &                  inj_phi(ix+spreadx,iy        ,ij)*    wx *(1.-wy) +
     &                  inj_phi(ix        ,iy+spready,ij)*(1.-wx)*    wy  +
     &                  inj_phi(ix+spreadx,iy+spready,ij)*    wx *    wy   )
          if (linj_enormcl) then
            --- Scale the normal E field to match the Child-Langmuir solution
            en = en*fourthirds*ddi**onethird
          endif

          --- Set the particle's electric field based off of the normal
          --- electric fields.
          tex(ip) = -en*sin(az)*cos(aa)
          tey(ip) = -en*sin(az)*sin(aa)
          tez(ip) =  en*cos(az)

          --- Zero out the original data
          ex(ip) = 0.
          ey(ip) = 0.
          ez(ip) = 0.

          if (linj_eperp) then
            --- Add in the tangential fields if requested.
            --- Fetch the transverse field components. The field is linearly
            --- interpolated between the value of the emitting surface
            --- (i.e. zero) and the value on the secondary surface.
            if (l_inj_rz .or. l_inj_rz_grid) then
              if (spreadx == 1) then
                atx = atan2(xx(ip)*sign(1.,rinj),(rinj - zz(ip))*sign(1.,rinj))
                etx = ddi*(inj_ex(ix  ,0,ij)*(1.-wx) + 
     &                     inj_ex(ix+1,0,ij)*    wx )
                tex(ip) = tex(ip) + etx*cos(aa)*cos(atx)
                tey(ip) = tey(ip) + etx*sin(aa)*cos(atx)
                tez(ip) = tez(ip) + etx*sin(atx)
              endif
            else
              atx = atan2(xx(ip)*sign(1.,rinj),(rinj - zz(ip))*sign(1.,rinj))
              aty = atan2(yy(ip)*sign(1.,rinj),(rinj - zz(ip))*sign(1.,rinj))
              sx = 1.
              sy = 1.
              if (xx(ip) < inj_xmmin(ij)) sx = -1.
              if (yy(ip) < inj_ymmin(ij)) sy = -1.
              etx = ddi*sx*(inj_ex(ix        ,iy        ,ij)*(1.-wx)*(1.-wy) +
     &                      inj_ex(ix+spreadx,iy        ,ij)*    wx *(1.-wy) +
     &                      inj_ex(ix        ,iy+spready,ij)*(1.-wx)*    wy  +
     &                      inj_ex(ix+spreadx,iy+spready,ij)*    wx *    wy   )
              ety = ddi*sy*(inj_ey(ix        ,iy        ,ij)*(1.-wx)*(1.-wy) +
     &                      inj_ey(ix+spreadx,iy        ,ij)*    wx *(1.-wy) +
     &                      inj_ey(ix        ,iy+spready,ij)*(1.-wx)*    wy  +
     &                      inj_ey(ix+spreadx,iy+spready,ij)*    wx *    wy   )

              tex(ip) = tex(ip) + etx*cos(atx)
              tey(ip) = tey(ip) + ety*cos(aty)
              tez(ip) = tez(ip) + etx*sin(atx) + ety*sin(aty)
            endif
          endif

        else
          ijp(ip) = 0.
        endif
      enddo

      --- Transform E field from injection source frame to lab frame
      call inj_transform(npart,tex,tey,tez,npart,ijp,1,0)

      --- Copy the temporary arrays into the original. Note that for
      --- particles near the source, the original was zero-ed out.
      --- For particles not near the source, the temp arrays are zero.
      ex = ex + tex
      ey = ey + tey
      ez = ez + tez

      deallocate(xx,yy,zz)
      deallocate(tex,tey,tez)
      deallocate(ijp)
      deallocate(zii,inj_di,rinjecti)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_sete3d = timeinj_sete3d + wtime() - substarttime
!$OMP END MASTER
      return
      end

[padvnc3d]
      subroutine inj_addtemp3d(pgroup,npart,ipmin,dz)
      use ParticleGroupmodule
      use Subtimersw3d
      use InGen3d
      use InPart
      use InPart3d
      use InjectVars
      use InjectVars3d
      use Particles,Only: tpid
      type(ParticleGroup):: pgroup
      integer(ISZ):: npart,ipmin
      real(kind=8):: dz

  Add temperature to injected particles when they cross virtual surface
  at distance inj_dtemp from emitter. The time of creation array pid(:,tpid)
  is used to flag particles that not have had temperature added yet.

      real(kind=8),allocatable:: xx(:),yy(:),zz(:)
      real(kind=8),allocatable:: vx(:),vy(:),vz(:)
      integer(ISZ),allocatable:: ijp(:)
      integer(ISZ):: ip,ij
      real(kind=8):: rinj,rinji,rr2,dd,ca,zii
      real(kind=8):: aa,az,atx,aty,tx,ty,tz
      real(kind=8):: dzi,inj_di
      real(kind=8):: rnorm
      real(kind=8):: wrandomgauss
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      if (inject <= 0) return
      if (.not. l_inj_delay_temp) return

      allocate(xx(npart),yy(npart),zz(npart))
      allocate(vx(npart),vy(npart),vz(npart))
      allocate(ijp(npart))

      dzi = 1./dz

      --- Transform the particles into the frame of the injection source
      xx = pgroup%xp(ipmin:ipmin+npart-1)
      yy = pgroup%yp(ipmin:ipmin+npart-1)
      zz = pgroup%zp(ipmin:ipmin+npart-1)
      ijp = int(pgroup%pid(ipmin:ipmin+npart-1,injpid))
      call inj_transform(npart,xx,yy,zz,npart,ijp,-1,1)

      --- Loop over particles
      do ip=1,npart

        --- if pid(pi,tpid)ɬ., temperature has already been added
        if(pgroup%pid(ip+ipmin-1,tpid)ɬ.) cycle

        --- Get number of injection source of particle.  The particle id
        --- has the number of the injection source the particle was
        --- emitted from. The int is needed since it is doing double
        --- duty and has other information stored in the fractional part.
        ij = ijp(ip)
        if (ij <= 0 .or. ij > ninject) cycle
        zii = 1./(dz*inj_d(ij))
        inj_di = 1./inj_d(ij)

        --- set temporaries
        rinj = rinject(ij)
        rinji = 1./rinj

        --- Calculate distance of particle from the emitting surface
        rr2 = xx(ip)**2 + yy(ip)**2
        if (abs(zz(ip)) < abs(rinj)) then
          --- The expression below for zz<rinj is mathematically identical
          --- to the commented out expression below.  That form is
          --- used so that for large values of rinj, i.e. a flat emitting
          --- surface, the correct value of the distance is calculated,
          --- namely zz.
          dd = rinj - sqrt(rr2 + (rinj - zz(ip))**2)
          dd = (2.*zz(ip) - (zz(ip)**2 + rr2)*rinji)/
     &         (1. + sqrt(rr2*rinji**2 + (1. - zz(ip)*rinji)**2))
        else
          --- When zz>rinj, a seperate equation is needed.
          dd = rinj + sqrt(rr2 + (zz(ip) - rinj)**2)
        endif
        dd = dd*dzi

        --- Only add temperature if particle close to emitting surface.
        if (.not. (0.0 <= inj_dtemp(ij)*dd .and. abs(dd) <= abs(inj_dtemp(ij)))) then
          --- The check if a particle is within the radial area of the
          --- source is probably not needed. It has been removed since there
          --- are some cases which this check is incorrect - for example
          --- in a source with a diverging beam. The particles spread out
          --- beyond the edge of the source and would not pass this check.
     &    ((xx(ip)*binject(ij))**2 + (yy(ip)*ainject(ij))**2 <=
     &    (ainject(ij)*binject(ij))**2) .and.
     &    ((xx(ip)*binjmin(ij))**2 + (yy(ip)*ainjmin(ij))**2 >=
     &    (ainjmin(ij)*binjmin(ij))**2) ) then

          --- angle of point in transverse plane
          aa = atan2(yy(ip),dvnz(xx(ip)))

          --- angle relative to z axis
          az = asin(sqrt(rr2)*rinji)
          az = atan2(sqrt(rr2),(rinj - zz(ip))*sign(1.,rinj)) XXX

          atx = asin(xx(ip)/sqrt(rinj**2 - yy(ip)**2))
          aty = asin(yy(ip)/sqrt(rinj**2 - xx(ip)**2))

          --- Add the particle's temperature
          --- These should be used but the index ii to pass in is not easy
          --- to obtain.
          wrandomgauss(vtrandom,ii,dig3,dig4,1,1,.false.)
          wrandomgauss(vtrandom,ii,dig5,dig6,1,1,.false.)
          wrandomgauss(vzrandom,ii,dig7,dig8,1,1,.false.)
          tx = vthperp_s(ij)*rnorm()
          ty = vthperp_s(ij)*rnorm()
          tz = vthz_s(ij)*rnorm()
          if(l_inj_addtempz_abs) tz=abs(tz)
          vx(ip) = - tz*sin(az)*cos(aa) + tx*cos(atx)
          vy(ip) = - tz*sin(az)*sin(aa) + ty*cos(aty)
          vz(ip) = + tz*cos(az)         + tx*sin(atx) + ty*sin(aty)

          --- reverse sign of pid(,tpid) indicating that temperature has been added
          pgroup%pid(ip+ipmin-1,tpid) = -pgroup%pid(ip+ipmin-1,tpid)

        endif
      enddo

      --- Transform thermal velocities to lab frame
      call inj_transform(npart,vx,vy,vz,npart,ijp,1,0)
      pgroup%uxp(ipmin:ipmin+npart-1) = pgroup%uxp(ipmin:ipmin+npart-1) + vx
      pgroup%uyp(ipmin:ipmin+npart-1) = pgroup%uyp(ipmin:ipmin+npart-1) + vy
      pgroup%uzp(ipmin:ipmin+npart-1) = pgroup%uzp(ipmin:ipmin+npart-1) + vz

      deallocate(xx,yy,zz)
      deallocate(vx,vy,vz)
      deallocate(ijp)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_addtemp3d = timeinj_addtemp3d + wtime() - substarttime
!$OMP END MASTER
      return
      end

[loadrho3d]
      subroutine loadrho_inject(pgroup)
      use ParticleGroupmodule
      use InjectVars
      use InjectVars3d,Only: inj_nz
      use InGen3d

      type(ParticleGroup):: pgroup
      if (inject == 0) return

      if(inj_nzɭ) call inj_setrhomr(pgroup)

      return
      end

[inject3d]
      subroutine inj_setrho3d(pgroup,dz,l2symtry,l4symtry)
      use ParticleGroupmodule
      use Subtimersw3d
      use InPart
      use InjectVars
      use InjectVars3d
      type(ParticleGroup):: pgroup
      real(kind=8):: dz
      logical(ISZ):: l2symtry,l4symtry

  Calculate the charge density on the emitting surface.  The particles are
  mapped onto the surface assuming a spherical emitter.  The charge density
  scale to account for symmetries and to account for the fraction of the
  grid cell within emitting surface.
  The scaling is done so that the charge density accurately represents
  the charge divided by the volume filled by that charge.  This only
  affects the edge of the emitting surface where only part of a grid cell
  will have charge in it, but the charge is divided by the area of all of
  the grid cell.

      integer(ISZ),allocatable:: ijp(:)
      integer(ISZ):: is,ip,ix,iy,ij,ij1,i1,i2
      real(kind=8):: rinj,rinji,rr2,dd
      real(kind=8):: aa,az,px,py,wx1,wy1,wx0,wy0,ww
      real(kind=8):: dxi,dyi,dzi
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      allocate(ijp(pgroup%npmax))

      dxi = 1./inj_dx
      dyi = 1./inj_dy
      dzi = 1./dz

      --- zero out the array
      inj_rho = 0.

      --- Loop over species and particles
      do is=1,ns
        i1 = pgroup%ins(is)
        i2 = pgroup%ins(is)+pgroup%nps(is)-1

        --- Transform the particles into the frame of the injection source
        if (pgroup%nps(is) > 0) then
          ijp(i1:i2) = int(pgroup%pid(i1:i2,injpid))
          call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),-1,1)
        endif

        ww = pgroup%sq(is)*pgroup%sw(is)*dxi*dyi*dzi
        do ip=i1,i2

          --- Get number of injection source of particle.  The particle id
          --- has the number of the injection source the particle was
          --- emitted from. The int is needed since it is doing double
          --- duty and has other information stored in the fractional part.
          ij = int(pgroup%pid(ip,injpid))

          --- Skip particles that did not come from an injection source.
          --- This is not necessarily the correct thing to do, but at least
          --- for now, it avoids memory problems and seg faults.
          --- Ultimately, a particle should be able to check if its near
          --- any conductor and do special coding for the field near it.
          if (ij <= 0 .or. ij > ninject) cycle

          --- set temporaries
          rinj = rinject(ij)

          --- Calculate distance of particle from the emitting surface
          rr2 = pgroup%xp(ip)**2 + pgroup%yp(ip)**2
          if (abs(pgroup%zp(ip)) < abs(rinj)) then
            --- The expression below for zp(ip)<rinj is mathematically identical
            --- to the commented out expression below.  That form is
            --- used so that for large values of rinj, i.e. a flat emitting
            --- surface, the correct value of the distance is calculated,
            --- namely zp(ip).
            dd = rinj - sqrt(rr2 + (rinj - zp(ip))**2)
            rinji = 1./rinject(ij)
            dd = (2.*pgroup%zp(ip) - (pgroup%zp(ip)**2 + rr2)*rinji)/
     &           (1. + sqrt(rr2*rinji**2 + (1. - pgroup%zp(ip)*rinji)**2))
            if (rinj < 0.) dd = -dd
          else
            --- When zp(ip)>rinj, a seperate equation is needed.
            dd = abs(rinj) + sqrt(rr2 + (pgroup%zp(ip) - rinj)**2)
          endif
          dd = dd*dzi

          --- Only deposit charge of particles close to emitting surface.
          if (abs(dd) < 1.) then

            --- angle of point in transverse plane
            if(l_inj_rz .or. l_inj_rz_grid) then
              aa = 0.
            else
              aa = atan2(pgroup%yp(ip),dvnz(pgroup%xp(ip)))
            endif

            --- angle relative to z axis
            az = atan2(sqrt(rr2),rinj-pgroup%zp(ip))

            --- Map the particle position onto the emitting surface.
            px = abs(rinj*sin(az)*cos(aa) - inj_xmmin(ij))*dxi
            py = abs(rinj*sin(az)*sin(aa) - inj_ymmin(ij))*dyi
            ix = int(px)
            iy = int(py)
            wx1 = px - ix
            wy1 = py - iy
            wx0 = 1. - wx1
            wy0 = 1. - wy1

            --- Deposit the particle's charge onto the emitting surface.
            ij1 = min(ij,inj_ninj)
            inj_rho(ix  ,iy  ,ij1)=inj_rho(ix  ,iy  ,ij1) + ww*wx0*wy0*(1.-dd)
            inj_rho(ix+1,iy  ,ij1)=inj_rho(ix+1,iy  ,ij1) + ww*wx1*wy0*(1.-dd)
            inj_rho(ix  ,iy+1,ij1)=inj_rho(ix  ,iy+1,ij1) + ww*wx0*wy1*(1.-dd)
            inj_rho(ix+1,iy+1,ij1)=inj_rho(ix+1,iy+1,ij1) + ww*wx1*wy1*(1.-dd)

          endif
        enddo

        --- Transform the particles back into the lab frame
        if (pgroup%nps(is) > 0) then
          call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),1,1)
        endif

      enddo

      --- Loop over unique injection sources
      do ij = 1,inj_ninj

        --- Scale the charge density by the inverse of the fraction of the
        --- contributing area which is within the emitting surface.
        --- The factor of two is needed since particles are contributing to
        --- rho only on the right side of the emitting surface.
        where (inj_area(:,:,ij) > 0.)
          inj_rho(:,:,ij) = 2.*inj_rho(:,:,ij)/inj_area(:,:,ij)
        end where

      enddo

      deallocate(ijp)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_setrho3d = timeinj_setrho3d + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inject3d]
      subroutine inj_setrho3d_z(pgroup,dz,nzlocal)
      use ParticleGroupmodule
      use Subtimersw3d
      use InPart
      use InjectVars
      use InjectVars3d
      use Constant
      type(ParticleGroup):: pgroup
      integer(ISZ):: nzlocal
      real(kind=8):: dz

  Calculate the charge density on the emitting surface.  The particles are
  mapped onto the surface assuming a spherical emitter.  The charge density
  scale to account for symmetries and to account for the fraction of the
  grid cell within emitting surface.
  The scaling is done so that the charge density accurately represents
  the charge divided by the volume filled by that charge.  This only
  affects the edge of the emitting surface where only part of a grid cell
  will have charge in it, but the charge is divided by the area of all of
  the grid cell.

      integer(ISZ),allocatable:: ijp(:)
      integer(ISZ):: is,ip,ij,j,ij1,i1,i2
      real(kind=8):: dd
      real(kind=8):: ww
      real(kind=8):: dzi
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      allocate(ijp(pgroup%npmax))

      dzi = 1./dz

      --- zero out the array
      call zeroarry(inj_rho(0,0,1),inj_ninj)

      --- Loop over species and particles
      do is=1,ns
        i1 = pgroup%ins(is)
        i2 = pgroup%ins(is)+pgroup%nps(is)-1

        --- Transform the particles into the frame of the injection source
        --- This is only really here to be consistent with the other routines.
        --- All that this should do is add zinject to zp.
        if (pgroup%nps(is) > 0) then
          ijp(i1:i2) = int(pgroup%pid(i1:i2,injpid))
          call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),-1,1)
        endif

        ww = 2.*pgroup%sq(is)*pgroup%sw(is)*dzi
        do ip=i1,i2

          --- Get number of injection source of particle.  The particle id
          --- has the number of the injection source the particle was
          --- emitted from. The int is needed since it is doing double
          --- duty and has other information stored in the fractional part.
          ij = int(pgroup%pid(ip,injpid))

          --- Skip particles that did not come from an injection source.
          --- This is not necessarily the correct thing to do, but at least
          --- for now, it avoids memory problems and seg faults.
          --- Ultimately, a particle should be able to check if its near
          --- any conductor and do special coding for the field near it.
          if (ij <= 0 .or. ij > ninject) cycle

          --- Calculate distance of particle from the emitting surface
          dd = pgroup%zp(ip)*dzi

          --- Only deposit charge of particles close to emitting surface.
          if (dd < 1.) then

            --- Deposit the particle's charge onto the emitting surface.
            ij1 = min(ij,inj_ninj)
            inj_rho(0,0,ij1)=inj_rho(0,0,ij1)+ww*(1.-dd)

          endif
        enddo

        --- Transform the particles back into the lab frame
        if (pgroup%nps(is) > 0) then
          call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),1,1)
        endif

      enddo

      deallocate(ijp)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_setrho3d = timeinj_setrho3d + wtime() - substarttime
!$OMP END MASTER
      return
      end

      subroutine getinj_phi_3d()
      use Subtimersw3d
      use InjectVars
      use InjectVars3d
      use Picglb,Only: zpminlocal,zpmaxlocal,zgrid

  Calculate potential from emitting surface at distance
  of dz from the surface.  This is only done for grid cells
  within two grid cells of the elliptical emitting surface, and
  within the axial extent of the grid. This is done over the
  full axial extent since points which are outside of the
  injection region maybe needed for the interlopation below
  to get zp.

      integer(ISZ):: ij,ix,iy,iz,i2x,i2y,i2z
      real(kind=8):: dxi,dyi,inj_dz_tmp
      real(kind=8):: xxsq,yysq,r1sq,r2sq
      real(kind=8):: ainjsqi,binjsqi
      real(kind=8):: xm,ym,rrsq,aa,az
      real(kind=8),allocatable:: xx(:),yy(:),zz(:),pp(:)
      integer(ISZ),allocatable:: xi(:),yi(:),zi(:),in(:)
      real(kind=8):: xg(0:inj_nx),yg(0:inj_ny),zg(0:inj_nz)
      real(kind=8):: zinj_grid,onethird,dz0,dz1,dzmin,zdist
      integer(ISZ):: nn,ii,i1,i2

      allocate(xx((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(yy((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(zz((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(xi((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(yi((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(zi((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(in((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))
      allocate(pp((1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject))

      dxi = 1./inj_dx
      dyi = 1./inj_dy

      --- loop over injection sources
      --- i1 and i2 keep track of the data as new data from each injection
      --- source is added. i1 is the starting location for the each set of
      --- data and i2 is the location of the last data point.
      i1 = 1
      do ij=1,ninject
        i2 = i1 - 1

        --- Set some temporaries.
        ainjsqi = 1./(ainject(ij) + 2.*inj_dx)**2
        binjsqi = 1./(binject(ij) + 2.*inj_dy)**2

        onethird = 1./3.
        dzmin = inj_d(ij)*inj_dz0/(real(inj_nz-1)*2.**onethird+2.-real(inj_nz))**3

        zg = 0.
        zg(0) = inj_d(ij)*inj_dz0
        do iz = 1, inj_nz-1
          if(iz==1) then
            dz0 = dzmin
            dz1 = dzmin
            zinj_grid = dz0
          else
            dz0 = dz1
            zinj_grid = zinj_grid+dz0
            dz1 = inj_d(ij)*inj_dz0*(1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &          - zinj_grid
          end if
          zg(iz) = zg(iz-1)-dz0
        end do

        do ix = 0,inj_nx
          xg(ix) = inj_xmmin(ij) + ix*inj_dx
        end do
        do iy = 0,inj_ny
          yg(iy) = inj_ymmin(ij) + iy*inj_dy
        end do

        do iz = 0, inj_nz
          do iy = 0,inj_ny
            ym = yg(iy)
            do ix = 0,inj_nx
              xm = xg(ix)

                i2 = i2 + 1

                --- Save coordinates relative to injection grid.
                xi(i2) = ix
                yi(i2) = iy
                zi(i2) = iz

                --- angle of point in transverse plane
                aa = atan2(ym,dvnz(xm))
                az = inj_angl(ix,iy,ij)

                zdist = inj_dz0*inj_d(ij)-zg(iz)
                --- Find coordinates of the point a distance dz in front
                --- of the source along a line perpendicular to the
                --- emitting surface.
                xx(i2) = xm - cos(aa)*sin(az)*zdist
                yy(i2) = ym - sin(aa)*sin(az)*zdist
                zz(i2) = inj_grid(ix,iy,ij) + cos(az)*zdist
                in(i2) = ij

               endif
            enddo
          enddo
        enddo

        call inj_transform(i2-i1+1,xx(i1),yy(i1),zz(i1),1,ij,1,1)

        --- Select only those data points which are within the grid.
        --- This also removes particles not within the domain of this
        --- process.
        nn = i1
        do ii=i1,i2
          if (zpminlocal+zgrid <= zz(ii) .and.
     &        zz(ii) < zpmaxlocal+zgrid .and.
     &        nn < ii) then
            xi(nn) = xi(ii)
            yi(nn) = yi(ii)
            zi(nn) = zi(ii)
            xx(nn) = xx(ii)
            yy(nn) = yy(ii)
            zz(nn) = zz(ii)
            in(nn) = in(ii)
            nn = nn + 1
          endif
        enddo
        i2 = nn - 1

        i1 = i2 + 1

      enddo
      nn = i1 - 1

      pp = 0.
      call fetchphi(nn,xx,yy,zz,pp)

      --- Calculate the difference between phi at that point and phi on
      --- the emitting surface.
      inj_phi_3d = 0.
      do ii=1,nn
        inj_phi_3d(xi(ii),yi(ii),zi(ii),in(ii)) = pp(ii)
      enddo
        
      deallocate(xx,yy,zz,xi,yi,zi,in,pp)

#ifdef MPIPARALLEL
      --- The result calculated on each processor is gathered on all
      call parallelnonzerorealarray(inj_phi_3d,
     &                              (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*inj_ninj)
#endif

      return
      end

[loadrho_inject]
      subroutine inj_setrhomr(pgroup)
      use ParticleGroupmodule
      use Subtimersw3d
      use Constant,Only: pi
      use Particles,Only: wpid
      use InGen3d
      use InjectVars,Only: ninject,inj_d,injpid,rinject
      use InjectVars3d
      type(ParticleGroup):: pgroup

  Calculate the charge density in the region of the injector mesh refinement.

      integer(ISZ), allocatable:: ijp(:)
      integer(ISZ):: i1,i2
      real(kind=8), allocatable, dimension(:) :: dz_local
      integer(ISZ):: sx,sy
      real(kind=8):: dxi,dyi,dzmin,dz0,dz1,zinj_grid,onethird,rinj,rinji
      real(kind=8):: px,py,wx0,wx1,wy0,wy1,wz0,wz1,rr2,dd,aa,az
      real(kind=8):: deltaz(ninject)
      integer(ISZ):: ix,iy,iz,is,ij,ij1,ip
      real(kind=8):: wws,ww
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      allocate(ijp(pgroup%npmax))
      allocate(dz_local(0:inj_nz))

      if(solvergeom==XYZgeom) then
        sx = 1
        sy = 1
      elseif(solvergeom==RZgeom) then
        sx = 1
        if(l_inj_rz .or. l_inj_rz_grid) then
          sy = 0
        else
          sy = 1
        end if
      elseif(solvergeom==XZgeom) then
        sx = 1
        sy = 0
      elseif(solvergeom==Zgeom) then
        sx = 0
        sy = 0
      endif

      onethird = 1./3.

      dzmin = 1./(real(inj_nz-1)*2.**onethird+2.-real(inj_nz))**3
      deltaz = inj_d*inj_dz0

      dxi = 1./inj_dx
      dyi = 1./inj_dy

      dz_local(0) = dzmin
      do iz = 1, inj_nz-1
        if(iz==1) then
          dz0 = dzmin
          dz1 = dzmin
          zinj_grid = dz0
        else
          dz0 = dz1
          zinj_grid = zinj_grid+dz0
          dz1 = (1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &        - zinj_grid
        end if
        dz_local(iz) = dz1
      end do
      dz_local(inj_nz) = dz_local(inj_nz-1)

      --- zero out the array
      inj_q = 0.

      --- Loop over species and particles
      if(.not. l_inj_rz .and. .not. l_inj_rz_grid) then
        do is=1,pgroup%ns
          i1 = pgroup%ins(is)
          i2 = pgroup%ins(is)+pgroup%nps(is)-1
          --- Transform the particles into the frame of the injection source
          if (pgroup%nps(is) > 0) then
            ijp(i1:i2) = int(pgroup%pid(i1:i2,injpid))
            call inj_transform(pgroup%nps(is),
     &                   pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                   pgroup%nps(is),ijp(i1:i2),-1,1)
          endif
          wws = pgroup%sq(is)*pgroup%sw(is)*dxi*dyi
          do ip=i1,i2
            if(wpidɬ) then
              ww=wws*pgroup%pid(ip,wpid)
            else
              ww=wws
            end if

            --- Get number of injection source of particle.  The particle id
            --- has the number of the injection source the particle was
            --- emitted from. The int is needed since it is doing double
            --- duty and has other information stored in the fractional part.
            ij = int(pgroup%pid(ip,injpid))

            --- Skip particles that did not come from an injection source.
            --- This is not necessarily the correct thing to do, but at least
            --- for now, it avoids memory problems and seg faults.
            --- Ultimately, a particle should be able to check if its near
            --- any conductor and do special coding for the field near it.
            if (ij <= 0 .or. ij > ninject) cycle

            ix = int(abs(pgroup%xp(ip) - inj_xmmin(ij))*dxi)
            iy = int(abs(pgroup%yp(ip) - inj_ymmin(ij))*dyi)

            --- set temporaries
            rinji = 1./rinject(ij)
            rinj = rinject(ij)

            --- Calculate distance of particle from the emitting surface
            rr2 = pgroup%xp(ip)**2 + pgroup%yp(ip)**2
            if (abs(pgroup%zp(ip)) < abs(rinj)) then
              --- The expression below for zp(ip)<rinj is mathematically identical
              --- to the commented out expression below.  That form is
              --- used so that for large values of rinj, i.e. a flat emitting
              --- surface, the correct value of the distance is calculated,
              --- namely zp(ip).
              dd = rinj - sqrt(rr2 + (rinj - zp(ip))**2)
              dd = (2.*pgroup%zp(ip) - (pgroup%zp(ip)**2 + rr2)*rinji)/
     &             (1. + sqrt(rr2*rinji**2 + (1. - pgroup%zp(ip)*rinji)**2))
              if (rinj < 0.) dd = -dd
            else
              --- When zp(ip)>rinj, a seperate equation is needed.
              dd = abs(rinj) + sqrt(rr2 + (pgroup%zp(ip) - rinj)**2)
            endif
            dd = dd/deltaz(ij)

            --- Only deposit charge of particles in emitting area
            if (abs(dd) < 1.) then

              --- angle of point in transverse plane
              aa = atan2(pgroup%yp(ip),dvnz(pgroup%xp(ip)))

              --- angle relative to z axis
              az = atan2(sqrt(rr2),rinj-pgroup%zp(ip))

              --- Map the particle position onto the emitting surface.
              px = abs(rinj*sin(az)*cos(aa) - inj_xmmin(ij))*dxi
              py = abs(rinj*sin(az)*sin(aa) - inj_ymmin(ij))*dyi
              ix = int(px)
              iy = int(py)
              wx1 = px - ix
              wy1 = py - iy
              wx0 = 1. - wx1
              wy0 = 1. - wy1
              if(dd>dzmin) then
                iz  = 1+int(real(inj_nz-1)*(1.-(1.-dd**onethird)/(1.-dzmin**onethird)))
                wz1 = (dd-(1.-((real(inj_nz-iz))/real(inj_nz-1))*(1.-dzmin**onethird))**3)/dz_local(iz)
              else
                prevent deposition in cell closer to emitter (gives better result on 1D Lampel-Tiefenback test)
                IF(l_inj_no_rho_on_emit) cycle
                iz  = 0
                wz1 = dd/dzmin
              end if
              wz0 = 1.-wz1
              if (ix < 0 .or. ix+sx > inj_nx) cycle
              if (iy < 0 .or. iy+sy > inj_ny) cycle
              if (iz < 0 .or. iz+1  > inj_nz) cycle

              --- Deposit the particle's charge onto the emitting region.
              ij1 = min(ij,inj_ninj)
              inj_q(ix   ,iy   ,iz  , ij1) = inj_q(ix   ,iy   ,iz  ,ij1) + ww*wx0*wy0*wz0
              inj_q(ix+sx,iy   ,iz  , ij1) = inj_q(ix+sx,iy   ,iz  ,ij1) + ww*wx1*wy0*wz0
              inj_q(ix   ,iy+sy,iz  , ij1) = inj_q(ix   ,iy+sy,iz  ,ij1) + ww*wx0*wy1*wz0
              inj_q(ix+sx,iy+sy,iz  , ij1) = inj_q(ix+sx,iy+sy,iz  ,ij1) + ww*wx1*wy1*wz0
              inj_q(ix   ,iy   ,iz+1, ij1) = inj_q(ix   ,iy   ,iz+1,ij1) + ww*wx0*wy0*wz1
              inj_q(ix+sx,iy   ,iz+1, ij1) = inj_q(ix+sx,iy   ,iz+1,ij1) + ww*wx1*wy0*wz1
              inj_q(ix   ,iy+sy,iz+1, ij1) = inj_q(ix   ,iy+sy,iz+1,ij1) + ww*wx0*wy1*wz1
              inj_q(ix+sx,iy+sy,iz+1, ij1) = inj_q(ix+sx,iy+sy,iz+1,ij1) + ww*wx1*wy1*wz1

            endif
          enddo

          --- Transform the particles back into the lab frame
          if (pgroup%nps(is) > 0) then
            call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),1,1)
          endif
        enddo

        if (solvergeom/=Zgeom) then
          --- Loop over unique injection sources
         do ij1 = 1,inj_ninj

          --- Scale the charge density by the inverse of the fraction of the/
          --- contributing area which is within the emitting surface.
            do iy=0,inj_ny
              do ix=0,inj_nx
                if (inj_area(ix,iy,ij1) > 0.)
     &            inj_q(ix,iy,:,ij1) = inj_q(ix,iy,:,ij1)/inj_area(ix,iy,ij1)
              enddo
            enddo
          enddo
        end if

      else !l_inj_rz=.true.
        do is=1,pgroup%ns
          i1 = pgroup%ins(is)
          i2 = pgroup%ins(is)+pgroup%nps(is)-1
          --- Transform the particles into the frame of the injection source
          if (pgroup%nps(is) > 0) then
            ijp(i1:i2) = int(pgroup%pid(i1:i2,injpid))
            call inj_transform(pgroup%nps(is),
     &                    pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                    pgroup%nps(is),ijp(i1:i2),-1,1)
          endif
          wws = pgroup%sq(is)*pgroup%sw(is)
          do ip=i1,i2
            if(wpidɬ) then
              ww=wws*pgroup%pid(ip,wpid)
            else
              ww=wws
            end if
            rr2 = pgroup%xp(ip)**2 + pgroup%yp(ip)**2
            ix = int(sqrt(rr2)*dxi)

            --- Get number of injection source of particle.  The particle id
            --- has the number of the injection source the particle was
            --- emitted from. The int is needed since it is doing double
            --- duty and has other information stored in the fractional part.
            ij = int(pgroup%pid(ip,injpid))

            --- Skip particles that did not come from an injection source.
            --- This is not necessarily the correct thing to do, but at least
            --- for now, it avoids memory problems and seg faults.
            --- Ultimately, a particle should be able to check if its near
            --- any conductor and do special coding for the field near it.
            if (ij <= 0 .or. ij > ninject) cycle

            --- set temporaries
            rinji = 1./rinject(ij)
            rinj = rinject(ij)

            --- Calculate distance of particle from the emitting surface
            if (abs(pgroup%zp(ip)) < abs(rinj)) then
              --- The expression below for zp(ip)<rinj is mathematically identical
              --- to the commented out expression below.  That form is
              --- used so that for large values of rinj, i.e. a flat emitting
              --- surface, the correct value of the distance is calculated,
              --- namely zp(ip).
              dd = rinj - sqrt(rr2 + (rinj - zp(ip))**2)
              dd = (2.*pgroup%zp(ip) - (pgroup%zp(ip)**2 + rr2)*rinji)/
     &             (1. + sqrt(rr2*rinji**2 + (1. - pgroup%zp(ip)*rinji)**2))
            else
              --- When zp(ip)>rinj, a seperate equation is needed.
              dd = abs(rinj) + sqrt(rr2 + (pgroup%zp(ip) - rinj)**2)
            endif
            dd = dd/deltaz(ij)

            --- Only deposit charge of particles in emitting area
            if (abs(dd) < 1.) then

              --- angle relative to z axis
              az = atan2(sqrt(rr2),rinj-pgroup%zp(ip))

              --- Map the particle position onto the emitting surface.
              px = abs(rinj*sin(az))*dxi
              ix = int(px)
              wx1 = px - ix
              wx0 = 1. - wx1
              if(dd>dzmin) then
                iz  = 1+int(real(inj_nz-1)*(1.-(1.-dd**onethird)/(1.-dzmin**onethird)))
                wz1 = (dd-(1.-((real(inj_nz-iz))/real(inj_nz-1))*(1.-dzmin**onethird))**3)/dz_local(iz)
              else
                prevent deposition in cell closer to emitter (gives better result on 1D Lampel-Tiefenback test)
                IF(l_inj_no_rho_on_emit) cycle
                iz  = 0
                wz1 = dd/dzmin
              end if
              wz0 = 1.-wz1
              if (ix < 0 .or. ix+sx > inj_nx) cycle
              if (iz < 0 .or. iz+1  > inj_nz) cycle

              --- Deposit the particle's charge onto the emitting region.
              ij1 = min(ij,inj_ninj)
              inj_q(ix   ,0   ,iz  , ij1) = inj_q(ix   ,0   ,iz  ,ij1) + ww*wx0*wz0
              inj_q(ix+sx,0   ,iz  , ij1) = inj_q(ix+sx,0   ,iz  ,ij1) + ww*wx1*wz0
              inj_q(ix   ,0   ,iz+1, ij1) = inj_q(ix   ,0   ,iz+1,ij1) + ww*wx0*wz1
              inj_q(ix+sx,0   ,iz+1, ij1) = inj_q(ix+sx,0   ,iz+1,ij1) + ww*wx1*wz1

            endif
          enddo

          --- Transform the particles back into the lab frame
          if (pgroup%nps(is) > 0) then
            call inj_transform(pgroup%nps(is),
     &                      pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                      pgroup%nps(is),ijp(i1:i2),1,1)
          endif
        enddo

        --- Loop over unique injection sources
        do ij1 = 1,inj_ninj
          --- Scale the charge density by the inverse of the fraction of the
          --- contributing area which is within the emitting surface.
          do ix=0,inj_nx
            if (inj_area(ix,0,ij1) > 0.)
     &        inj_q(ix,0,:,ij1) = inj_q(ix,0,:,ij1) / inj_area(ix,0,ij1)
            if(ix==0) then
              inj_q(ix,0,:,ij1) = inj_q(ix,0,:,ij1) / (pi*0.25*inj_dx**2)
            else
              inj_q(ix,0,:,ij1) = inj_q(ix,0,:,ij1) / (2.*pi*ix*inj_dx**2)
            end if
          enddo
        enddo

      end if

      --- assign charge deposited on emitter surface (slice 0) to slice 1
      IF(.not. l_inj_no_rho_on_emit) then
        inj_q(:,:,1,:) = inj_q(:,:,1,:) + inj_q(:,:,0,:)
        inj_q(:,:,0,:) = 0.
      end if

#ifdef MPIPARALLEL
      --- The result calculated on each processor is summed on all
        call parallelsumrealarray(inj_q,
     &                            (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*ninject)
#endif

      deallocate(ijp)
      deallocate(dz_local)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_setrhomr = timeinj_setrhomr + wtime() - substarttime
!$OMP END MASTER
      return
      end

[getinj_phi]
      subroutine getinj_phi_mr(nn,xxp,yyp,zzp,xi,yi)
      use Subtimersw3d
      use InPart
      use InGen
      use InGen3d
      use InjectVars
      use InjectVars3d
      use InMesh3d
      use Constant
      use Fields3d
      use Fields3dParticles
      use Picglb
      use Picglb3d
      integer(ISZ), intent(in) :: nn
      real(kind=8), dimension(nn), intent(in) :: xxp, yyp, zzp
      integer(ISZ), dimension(nn), intent(in) :: xi, yi

  Calculate the phi in the emitting region by doing 1-d tridiagonal solves
  of Poisson's equation along lines normal to the emitting surface.

      integer(ISZ):: i,is,ip,ix,iy,iz,ij,ij1,irhs,nrhs
      integer(ISZ):: nrhs4,kd,ldab,ldb,n,info
      real(kind=8):: rinj,rinji,rr2,dd,deltaz(ninject)
      real(kind=8):: aa,az,px,py,wx1,wy1,wz1,wx0,wy0,wz0,ww,wws,dz0,dz1,zinj_grid
      real(kind=8):: dxi,dyi,dzmin,ainjsqi,binjsqi,rrsq,onethird,ext,eyt,ezt,atx,aty
      real(kind=8), allocatable, dimension(:,:) :: rhs, ab
      real(kind=8), allocatable, dimension(:) :: ex_tmp, ey_tmp, ez_tmp
      real(kind=8), allocatable, dimension(:) :: bx_tmp, by_tmp, bz_tmp
      real(kind=8), allocatable, dimension(:) :: dz_local
      integer(ISZ):: allocerror
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      allocate(ex_tmp(nn),ey_tmp(nn),ez_tmp(nn))
      allocate(bx_tmp(nn),by_tmp(nn),bz_tmp(nn))
      allocate(dz_local(0:inj_nz))

      onethird = 1./3.

      dzmin = 1./(real(inj_nz-1)*2.**onethird+2.-real(inj_nz))**3
      deltaz = inj_d*inj_dz0

      dxi = 1./inj_dx
      dyi = 1./inj_dy

      dz_local(0) = dzmin
      do iz = 1, inj_nz-1
        if(iz==1) then
          dz0 = dzmin
          dz1 = dzmin
          zinj_grid = dz0
        else
          dz0 = dz1
          zinj_grid = zinj_grid+dz0
          dz1 = (1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &        - zinj_grid
        end if
        dz_local(iz) = dz1
      end do
      dz_local(inj_nz) = dz_local(inj_nz-1)

      --- initialize inj_phi_3d
      inj_phi_3d = 0.
      --- getinj_phi_3d should not be called since the values that
      --- would be calculated are never used and are incorrect. These
      --- incorrect values block inj_phi_3d from being updated in
      --- the call to parallelnonzerorealarray below.
      call getinj_phi_3d()

      allocate(rhs(1:inj_nz-1,nn),ab(2,1:inj_nz-1),stat=allocerror)
      if (allocerror /= 0) then
        print*,"getinj_phi_mr: allocation error ",allocerror,
     &         ": could not allocate rhs and ab to shape ",inj_nz,nn
        call kaboom("getinj_phi_mr: allocation error")
        return
      endif

      do ij = 1, inj_ninj
        nrhs = nn
        --- set temporaries
        rinji = 1./rinject(ij)
        rinj = rinject(ij)
        do iz = 1, inj_nz-1
          if(iz==1) then
            dz0 = dzmin*deltaz(ij)
            dz1 = dzmin*deltaz(ij)
            zinj_grid = dz0
          else
            dz0 = dz1
            zinj_grid = zinj_grid+dz0
            dz1 = deltaz(ij)*(1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &          - zinj_grid
          end if
          ab(1,iz) = -1./dz0
          ab(2,iz) = 1./dz0+1./dz1
          do irhs = 1, nrhs
            ix = xi(irhs)
            iy = yi(irhs)
            if (l_inj_use_rho_with_mr) then
              rhs(iz,irhs) = inj_q(ix,iy,iz,ij)/eps0
            else
              rhs(iz,irhs) = 0.
            endif
          end do
          if(iz==1) then
            do irhs = 1, nrhs
              ix = xi(irhs)
              iy = yi(irhs)
              inj_phi_3d(ix,iy,iz-1,ij) = vinject(ij)
              rhs(iz,irhs) = rhs(iz,irhs) + inj_phi_3d(ix,iy,iz-1,ij)/dz0
            end do
          end if
          if(iz==inj_nz-1) then
            do irhs = 1, nrhs
              ix = xi(irhs)
              iy = yi(irhs)
              inj_phi_3d(ix,iy,iz+1,ij) = vinject(ij)-inj_phi(ix,iy,ij)
              rhs(iz,irhs) = rhs(iz,irhs) + inj_phi_3d(ix,iy,iz+1,ij)/dz1
            end do
          end if
        end do
        kd = 1
        ldab = 1+kd
        ldb = inj_nz-1
        n = inj_nz-1
        nrhs4 = nrhs
#ifdef CYGWIN
        call dpbsv_('u',n,kd,nrhs4,ab,ldab,rhs,ldb,info)
#else
        call dpbsv('u',n,kd,nrhs4,ab,ldab,rhs,ldb,info)
#endif
        if(info/=0) then
          write(0,*) 'ERROR in subroutine getinj_phi_mr at exit of call to LAPACK routine dpbsv'
          write(0,*) 'Info /= 0, info = ',info
          write(0,*) 'Stop'
          call kaboom("getinj_phi_mr: error in call to dpbsv")
          return
        end if
        do iz = 1, inj_nz-1
          do irhs = 1, nrhs
            ix = xi(irhs)
            iy = yi(irhs)
            inj_phi_3d(ix,iy,iz,ij) = rhs(iz,irhs)
          end do
        end do

#ifdef MPIPARALLEL
      --- The result calculated on each processor is gathered on all
        call parallelnonzerorealarray(inj_phi_3d,
     &                                (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*inj_ninj)
#endif

        call inj_transform(nrhs,xxp,yyp,zzp,1,ij,-1,1)

        inj_ex_3d = 0.
        inj_ey_3d = 0.
        inj_ez_3d = 0.

        do iz = 1, inj_nz-1
          if(iz==1) then
            dz0 = dzmin*deltaz(ij)
            dz1 = dzmin*deltaz(ij)
            zinj_grid = dz0
          else
            dz0 = dz1
            zinj_grid = zinj_grid+dz0
            dz1 = deltaz(ij)*(1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &          - zinj_grid
          end if
          do irhs = 1, nrhs
            ix = xi(irhs)
            iy = yi(irhs)

            --- computes components of electric field in source curved coordinates
            ezt = (inj_phi_3d(ix,iy,iz-1,ij)-inj_phi_3d(ix,iy,iz+1,ij))/(dz0+dz1)
            if(linj_eperp .and. ixɬ .and. ix<inj_nx) then
              ext = 0.5*(inj_phi_3d(ix-1,iy,iz,ij)-inj_phi_3d(ix+1,iy,iz,ij))*dxi
            else
              ext = 0.
            end if
            if(linj_eperp .and. iyɬ .and. iy<inj_ny) then
              eyt = 0.5*(inj_phi_3d(ix,iy-1,iz,ij)-inj_phi_3d(ix,iy+1,iz,ij))*dyi
            else
              eyt = 0.
            end if

            --- angle of point in transverse plane
            aa = atan2(yyp(irhs),dvnz(xxp(irhs)))

            --- angle relative to z axis
            rr2 = xxp(irhs)**2 + yyp(irhs)**2
            az = asin(sqrt(rr2)*rinji)
          az = atan2(sqrt(rr2),(rinj - zz(ip))*sign(1.,rinj)) XXX

            atx = asin(xxp(irhs)/sqrt(rinj**2 - yyp(irhs)**2))
            aty = asin(yyp(irhs)/sqrt(rinj**2 - xxp(irhs)**2))

            --- transform field vector from source coordinates to lab coordinates
            inj_ex_3d(ix,iy,iz,ij) = - ezt*sin(az)*cos(aa) + ext*cos(atx)
            inj_ey_3d(ix,iy,iz,ij) = - ezt*sin(az)*sin(aa) + eyt*cos(aty)
            inj_ez_3d(ix,iy,iz,ij) = ezt*cos(az) + ext*sin(atx) + eyt*sin(aty)
          end do
          inj_ez_3d(:,:,0,ij) = inj_ez_3d(:,:,1,ij)
        end do
        call inj_transform(nrhs,xxp,yyp,zzp,1,ij,1,1)
        call inj_transform(nrhs,inj_ex_3d,inj_ey_3d,inj_ez_3d,1,ij,1,0)
         inj_phi_3d = inj_phi_tmp
            --- Obtain the self-field from the electrostatic potential
        call fetche3dfrompositions(0,1,nn,xxp,yyp,zzp,ex_tmp,ey_tmp,ez_tmp,
     &                                                bx_tmp,by_tmp,bz_tmp)

        dd = inj_d(ij)*dzmin
        if(dd>dzmin) then
          iz  = 1+int(real(inj_nz-1)*(1.-(1.-dd**onethird)/(1.-dzmin**onethird)))
          wz1 = (dd-(1.-((real(inj_nz-iz))/real(inj_nz-1))*(1.-dzmin**onethird))**3)/dz_local(iz)
           iz = int(real(inj_nz) - log(1./dd)/log(2.))
           wz1 = dd*2.**(inj_nz-iz)-1.
        else
          iz = 0
          wz1 = dd/dzmin
        end if
        wz0 = 1.-wz1
        do irhs = 1, nrhs
          ix = xi(irhs)
          iy = yi(irhs)
          inj_ex_3d(ix,iy,inj_nz,ij) = ex_tmp(irhs)
          inj_ey_3d(ix,iy,inj_nz,ij) = ey_tmp(irhs)
          inj_ez_3d(ix,iy,inj_nz,ij) = ez_tmp(irhs)
          inj_phi(ix,iy,ij) = vinject(ij) - (inj_phi_3d(ix,iy,iz,ij)*wz0+inj_phi_3d(ix,iy,iz+1,ij)*wz1)
        end do
      end do

#ifdef MPIPARALLEL
      --- The result calculated on each processor is gathered on all
        call parallelnonzerorealarray(inj_ex_3d,
     &                                (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*inj_ninj)
        call parallelnonzerorealarray(inj_ey_3d,
     &                                (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*inj_ninj)
        call parallelnonzerorealarray(inj_ez_3d,
     &                                (1+inj_nx)*(1+inj_ny)*(1+inj_nz)*inj_ninj)
#endif

      deallocate(rhs,ab)
      deallocate(ex_tmp,ey_tmp,ez_tmp)
      deallocate(bx_tmp,by_tmp,bz_tmp)
      deallocate(dz_local)

!$OMP MASTER
      if (lw3dtimesubs) timegetinj_phi_mr = timegetinj_phi_mr + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inj_sete3d]
      subroutine inj_sete_3darray(npart,xp,yp,zp,pid,dz,ex,ey,ez)
      use Subtimersw3d
      use InGen3d
      use InPart
      use InjectVars
      use InjectVars3d
      integer(ISZ):: npart
      real(kind=8):: xp(npart),yp(npart),zp(npart),pid(npart)
      real(kind=8):: ex(npart),ey(npart),ez(npart)
      real(kind=8):: dz

  Calculate the electric field from the grid for particles near the emitting
  surface.  The normal electric field is calculated from the potential drop
  across a length of 'dz' along a line normal to the emitting surface.  The
  field components are then obtained from the normal field. Optionally,
  the tangential fields near the emitting surface can also be included.
  Eventually this routine could be expanded to calculate the E field near
  any conductor surface.

      real(kind=8),allocatable:: xx(:),yy(:),zz(:)
      real(kind=8),allocatable:: tex(:),tey(:),tez(:)
      integer(ISZ),allocatable:: ijp(:)
      integer(ISZ):: ip,ix,iy,iz,ij
      real(kind=8):: rinj,rinji,rr2,wr,dd,ca,dzmin,deltaz(ninject)
      real(kind=8):: aa,az,atx,aty,px,py,pz,wx,wy,wz,en
      real(kind=8):: dxi,dyi
      real(kind=8):: sx,sy,s0,s1,s2,s3,s4,s5,s6,s7
      real(kind=8):: fourthirds,onethird
      real(kind=8):: substarttime,wtime
      integer(ISZ):: spreadx,spready
      real(kind=8):: dz0, dz1, zinj_grid
      real(kind=8), dimension(0:inj_nz) :: dz_local
      if (lw3dtimesubs) substarttime = wtime()

      allocate(xx(npart),yy(npart),zz(npart))
      allocate(tex(npart),tey(npart),tez(npart))
      allocate(ijp(npart))

      if(solvergeom==XYZgeom) then
        spreadx = 1
        spready = 1
      elseif(solvergeom==RZgeom) then
        spreadx = 1
        if(l_inj_rz .or. l_inj_rz_grid) then
          spready = 0
        else
          spready = 1
        end if
      elseif(solvergeom==XZgeom) then
        spreadx = 1
        spready = 0
      elseif(solvergeom==Zgeom) then
        spreadx = 0
        spready = 0
      end if
      tex = 0.
      tey = 0.
      tez = 0.

      dxi = 1./inj_dx
      dyi = 1./inj_dy
      fourthirds = 4./3.
      onethird = 1./3.

      dzmin = 1./(real(inj_nz-1)*2.**onethird+real(2-inj_nz))**3
       dzmin = 1./(2.**(inj_nz-1))
      deltaz(:) = inj_d(1)*inj_dz0

      dz_local(0) = dzmin
      do iz = 1, inj_nz-1
        if(iz==1) then
          dz0 = dzmin
          dz1 = dzmin
          zinj_grid = dz0
        else
          dz0 = dz1
          dz1 = 2.*dz1
          zinj_grid = zinj_grid+dz0
          dz1 = (1.-(real(inj_nz-iz-1)/real(inj_nz-1))*(1.-dzmin**onethird))**3
     &        - zinj_grid
        end if
        dz_local(iz) = dz1
      end do
      dz_local(inj_nz) = dz_local(inj_nz-1)

      --- Transform the particles into the frame of the injection source
      xx = xp
      yy = yp
      zz = zp
      ijp = int(pid)
      call inj_transform(npart,xx,yy,zz,npart,ijp,-1,1)

      --- Loop over particles
      do ip=1,npart

        --- Get number of injection source of particle.  The particle id
        --- has the number of the injection source the particle was
        --- emitted from. The int is needed since it is doing double
        --- duty and has other information stored in the fractional part.
        ij = ijp(ip)
        if (ij <= 0 .or. ij > ninject) cycle

        --- set temporaries
        rinj = rinject(ij)
        rinji = 1./rinj

        --- Calculate distance of particle from the emitting surface
        rr2 = xx(ip)**2 + yy(ip)**2
        if (abs(zz(ip)) < abs(rinj)) then
          --- The expression below for zz<rinj is mathematically identical
          --- to the commented out expression below.  That form is
          --- used so that for large values of rinj, i.e. a flat emitting
          --- surface, the correct value of the distance is calculated,
          --- namely zz.
          dd = rinj - sqrt(rr2 + (rinj - zz(ip))**2)
          dd = (2.*zz(ip) - (zz(ip)**2 + rr2)*rinji)/
     &         (1. + sqrt(rr2*rinji**2 + (1. - zz(ip)*rinji)**2))
        else
          --- When zz>rinj, a seperate equation is needed.
          dd = abs(rinj) + sqrt(rr2 + (zz(ip) - rinj)**2)
        endif
        dd = dd/deltaz(ij)

        --- Only calculate E-field if particle close to emitting surface.
         if (0.0 <= inj_d(ij)*dd .and. abs(dd) <= 0.5) then
        if (0.0 <= inj_d(ij)*dd .and. abs(dd) <= 1.) then
          --- The check if a particle is within the radial area of the
          --- source is probably not needed. It has been removed since there
          --- are some cases which this check is incorrect - for example
          --- in a source with a diverging beam. The particles spread out
          --- beyond the edge of the source and would not pass this check.
     &    ((xx(ip)*binject(ij))**2 + (yy(ip)*ainject(ij))**2 <=
     &    (ainject(ij)*binject(ij))**2) .and.
     &    ((xx(ip)*binjmin(ij))**2 + (yy(ip)*ainjmin(ij))**2 >=
     &    (ainjmin(ij)*binjmin(ij))**2) ) then

          --- angle of point in transverse plane
          aa = atan2(yy(ip),xx(ip))

          --- If using axisymmetric injection, convert x and y to radius.
          if (l_inj_rz .or. l_inj_rz_grid) then
            xx(ip) = sqrt(rr2)
            yy(ip) = 0.
          endif

          --- angle relative to z axis
          az = asin(sqrt(rr2)*rinji)
          az = atan2(sqrt(rr2),(rinj - zz(ip))*sign(1.,rinj)) XXX

          --- Find coordinates of the point on the secondary surface in front
          --- of the emitting surface along a line perpendicular to the
          --- emitting surface.
          wr = 1./(1. - deltaz(ij)*dd*rinji)
          px = abs(rinj*sin(az)*cos(aa) - inj_xmmin(ij))*dxi
          py = abs(rinj*sin(az)*sin(aa) - inj_ymmin(ij))*dyi
          px = abs(xx(ip)*wr - inj_xmmin(ij))*dxi
          py = abs(yy(ip)*wr - inj_ymmin(ij))*dyi

          if (spreadx*px < 0. .or. (px+1)*spreadx > inj_nx .or.
     &        spready*py < 0. .or. (py+1)*spready > inj_ny) cycle

          ix = spreadx*int(px)
          iy = spready*int(py)
          wx = spreadx*(px - ix)
          wy = spready*(py - iy)
          if(dd>dzmin) then
            iz = 1+int(real(inj_nz-1)*(1.-(1.-dd**onethird)/(1.-dzmin**onethird)))
            wz = (dd-(1.-((real(inj_nz)-real(iz))/real(inj_nz-1))*(1.-dzmin**onethird))**3)/dz_local(iz)
             iz = int(real(inj_nz) - log(1./dd)/log(2.))
             wz = dd*2.**(inj_nz-iz)-1.
          else
            iz = 0
            wz = dd/dzmin
          end if


          ex(ip) = 0.
          ey(ip) = 0.
          ez(ip) = 0.

          s0 = (1.-wx)*(1.-wy)*(1.-wz)
          s1 =     wx *(1.-wy)*(1.-wz)
          s2 = (1.-wx)*    wy *(1.-wz)
          s3 =     wx *    wy *(1.-wz)
          s4 = (1.-wx)*(1.-wy)*    wz
          s5 =     wx *(1.-wy)*    wz
          s6 = (1.-wx)*    wy *    wz
          s7 =     wx *    wy *    wz 

          if(l_inj_rz .or. l_inj_rz_grid) then
            en = inj_ex_3d(ix        ,iy        ,iz  ,ij)*s0 +
     &           inj_ex_3d(ix+spreadx,iy        ,iz  ,ij)*s1 +
     &           inj_ex_3d(ix        ,iy        ,iz+1,ij)*s4 +
     &           inj_ex_3d(ix+spreadx,iy        ,iz+1,ij)*s5
            tex(ip) = en*cos(aa)
            tey(ip) = en*sin(aa)
          else
            sx = 1.
            sy = 1.
            if (xx(ip) < inj_xmmin(ij)) sx = -1.
            if (yy(ip) < inj_ymmin(ij)) sy = -1.

            tex(ip) = sx*(inj_ex_3d(ix        ,iy        ,iz  ,ij)*s0 +
     &                    inj_ex_3d(ix+spreadx,iy        ,iz  ,ij)*s1 +
     &                    inj_ex_3d(ix        ,iy+spready,iz  ,ij)*s2 +
     &                    inj_ex_3d(ix+spreadx,iy+spready,iz  ,ij)*s3 +
     &                    inj_ex_3d(ix        ,iy        ,iz+1,ij)*s4 +
     &                    inj_ex_3d(ix+spreadx,iy        ,iz+1,ij)*s5 +
     &                    inj_ex_3d(ix        ,iy+spready,iz+1,ij)*s6 +
     &                    inj_ex_3d(ix+spreadx,iy+spready,iz+1,ij)*s7 )

            tey(ip) = sy*(inj_ey_3d(ix        ,iy        ,iz  ,ij)*s0 +
     &                    inj_ey_3d(ix+spreadx,iy        ,iz  ,ij)*s1 +
     &                    inj_ey_3d(ix        ,iy+spready,iz  ,ij)*s2 +
     &                    inj_ey_3d(ix+spreadx,iy+spready,iz  ,ij)*s3 +
     &                    inj_ey_3d(ix        ,iy        ,iz+1,ij)*s4 +
     &                    inj_ey_3d(ix+spreadx,iy        ,iz+1,ij)*s5 +
     &                    inj_ey_3d(ix        ,iy+spready,iz+1,ij)*s6 +
     &                    inj_ey_3d(ix+spreadx,iy+spready,iz+1,ij)*s7 )
          end if

          tez(ip) =     inj_ez_3d(ix        ,iy        ,iz  ,ij)*s0 +
     &                  inj_ez_3d(ix+spreadx,iy        ,iz  ,ij)*s1 +
     &                  inj_ez_3d(ix        ,iy+spready,iz  ,ij)*s2 +
     &                  inj_ez_3d(ix+spreadx,iy+spready,iz  ,ij)*s3 +
     &                  inj_ez_3d(ix        ,iy        ,iz+1,ij)*s4 +
     &                  inj_ez_3d(ix+spreadx,iy        ,iz+1,ij)*s5 +
     &                  inj_ez_3d(ix        ,iy+spready,iz+1,ij)*s6 +
     &                  inj_ez_3d(ix+spreadx,iy+spready,iz+1,ij)*s7

        endif
      enddo

      call inj_transform(npart,tex,tey,tez,npart,ijp,1,0)
      ex = ex + tex
      ey = ey + tey
      ez = ez + tez

      deallocate(xx,yy,zz)
      deallocate(tex,tey,tez)
      deallocate(ijp)

!$OMP MASTER
      if (lw3dtimesubs) timeinj_sete_3darray = timeinj_sete_3darray + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inject3d]
      subroutine gettinj_phi()
      use Subtimersw3d
      use Constant
      use InMesh3d
      use InjectVars
      use InjectVars3d
      use Picglb,Only: zpminlocal,zpmaxlocal,zgrid

  Calculate potential drop from transverse emitting surface at distance
  of dx from the surface.

      integer(ISZ):: ij,iz,ith
      real(kind=8):: aa,rsign,aoverb
      real(kind=8):: xx((1+nttinjmax)*(1+nztmax)*ntinj)
      real(kind=8):: yy((1+nttinjmax)*(1+nztmax)*ntinj)
      real(kind=8):: zz((1+nttinjmax)*(1+nztmax)*ntinj)
      integer(ISZ):: ti((1+nttinjmax)*(1+nztmax)*ntinj)
      integer(ISZ):: zi((1+nttinjmax)*(1+nztmax)*ntinj)
      integer(ISZ):: in((1+nttinjmax)*(1+nztmax)*ntinj)
      real(kind=8):: pp((1+nttinjmax)*(1+nztmax)*ntinj)
      integer(ISZ):: nn,ii,i1,i2
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      if (ntinj == 0) return

      if (tinject < 1 .or. tinject > 3) return

      --- loop over injection sources
      --- i1 and i2 keep track of the data as new data from each injection
      --- source is added. i1 is the starting location for the each set of
      --- data and i2 is the location of the last data point.
      i1 = 1
      do ij=1,ntinj
        i2 = i1 - 1

        do iz=0,nztinj(ij)
          do ith=0,nttinj(ij)

            i2 = i2 + 1

            aa = ith*2.*pi/nttinj(ij)
            if (ltinj_outward(ij)) then
              rsign = +1.
            else
              rsign = -1.
            endif
            aoverb = atinjectz(iz,ij)/btinjectz(iz,ij)
            xx(i2) = xtinject(ij) + (atinjectz(iz,ij) + rsign*inj_dx)*cos(aa)
            yy(i2) = ytinject(ij) + (btinjectz(iz,ij) + rsign*inj_dx*aoverb)*sin(aa)
            zz(i2) = ztinjmn(ij) + iz*dztinj(ij)

            --- Save coordinates relative to injection grid.
            zi(i2) = iz
            ti(i2) = ith
            in(i2) = ij

          enddo
        enddo

        --- Select only those data points which are within the grid.
        --- This also removes particles not within the domain of this
        --- process.
        nn = i1
        do ii=i1,i2
          if (zpminlocal+zgrid <= zz(ii) .and.
     &        zz(ii) <= zpmaxlocal+zgrid) then
            if (nn < ii) then
              zi(nn) = zi(ii)
              ti(nn) = ti(ii)
              xx(nn) = xx(ii)
              yy(nn) = yy(ii)
              zz(nn) = zz(ii)
              in(nn) = in(ii)
            endif
            nn = nn + 1
          endif
        enddo
        i2 = nn - 1

        i1 = i2 + 1

      enddo
      nn = i1 - 1

      pp = 0.
      call fetchphi(nn,xx,yy,zz,pp)

      --- Calculate the difference between phi at that point and phi on
      --- the emitting surface.
      tinj_phi = 0.
      do ii=1,nn
        tinj_phi(ti(ii),zi(ii),in(ii)) = vtinject(in(ii)) - pp(ii)
      enddo
        
#ifdef MPIPARALLEL
      --- The result calculated on each processor is gathered on all
      call parallelnonzerorealarray(tinj_phi,(1+nttinjmax)*(1+nztmax)*ntinj)
#endif


!$OMP MASTER
      if (lw3dtimesubs) timegettinj_phi = timegettinj_phi + wtime() - substarttime
!$OMP END MASTER
      return
      end

[inj_sete] [inject3d]
      subroutine tinj_sete3d(npart,xp,yp,zp,pid,ex,ey,ez)
      use Subtimersw3d
      use Constant
      use InGen3d
      use InMesh3d
      use Picglb3d
      use InPart
      use InjectVars
      use InjectVars3d
      integer(ISZ):: npart
      real(kind=8):: xp(npart),yp(npart),zp(npart),pid(npart)
      real(kind=8):: ex(npart),ey(npart),ez(npart)

  Calculate the electric field from the grid for particles near the transverse
  emitting surfaces.  The normal electric field is calculated from the potential drop
  across a length of 'dx' along a line normal to the emitting surface.  The
  field components are then obtained from the normal field.
  Eventually this routine could be expanded to calculate the E field near
  any conductor surface.

      real(kind=8):: xx,yy,zz
      integer(ISZ):: ijp
      integer(ISZ):: ip,iz,ith,ij
      real(kind=8):: ainj,binj,ainjp1,binjp1,aoverb
      real(kind=8):: p1x,p1y,p2x,p2y
      real(kind=8):: dr,rr,dd,rsign,dzi
      real(kind=8):: aa,wt,wz,en
      real(kind=8):: sx,sy
      real(kind=8):: fourthirds,onethird
      real(kind=8):: substarttime,wtime
      if (lw3dtimesubs) substarttime = wtime()

      --- The user can force the code to skip the special calculation of the 
      --- E fields for particles behind the virtual surface.
      if (linj_efromgrid) return

      fourthirds = 4./3.
      onethird = 1./3.

      --- Loop over particles
      do ip=1,npart

        --- Get number of injection source of particle.  The particle id
        --- has the number of the injection source the particle was
        --- emitted from. The int is needed since it is doing double
        --- duty and has other information stored in the fractional part.
        ij = int(pid(ip)) - ninject
        if (ij <= 0 .or. ij > ntinj) cycle

        xx = xp(ip) - xtinject(ij)
        yy = yp(ip) - ytinject(ij)
        zz = zp(ip) - ztinjmn(ij)

        iz = zz/dztinj(ij)
        if (iz < 0. .or. iz >= nztinj(ij)) cycle
        wz = zz/dztinj(ij) - iz
        
        ainj = atinjectz(iz,ij)
        binj = btinjectz(iz,ij)
        ainjp1 = atinjectz(iz+1,ij)
        binjp1 = btinjectz(iz+1,ij)
        aoverb = ainj/binj

        if(.not.l_inj_rz) then
          aa = atan2(yy*aoverb,xx)
        else
          aa = 0.
        endif
        if (aa < 0.) aa = aa + 2*pi
        ith = aa/(2.*pi)*nttinj(ij)

        if (ltinj_outward(ij)) then
          rsign = +1.
        else
          rsign = -1.
        endif

        --- Calculate distance of particle from the emitting surface
        rr = sqrt(xx**2 + (yy*aoverb)**2)
        p1x = (ainj*(1.-wz) + ainjp1*wz)*cos(aa)
        p1y = (binj*(1.-wz) + binjp1*wz)*sin(aa)
        p2x = p1x + rsign*inj_dx*cos(aa)
        p2y = p1y + rsign*inj_dx*aoverb*sin(aa)
        dr = sqrt((p2x - p1x)**2 + (p2y - p1y)**2)
        dd = rsign*(rr - ainj)/dr

        --- Only calculate E-field if particle close to emitting surface.
        if (0.0 <= dd .and. dd <= 1.) then

          --- Find coordinates of the point on the secondary surface in front
          --- of the emitting surface along a line perpendicular to the
          --- emitting surface.
          wt = aa/(2.*pi)*nttinj(ij) - ith

          --- Calculate the normal electric field from the potential drop in
          --- front of the emitting surface. The normal field given by the
          --- Child-Langmuir solution (for planar surfaces) is used.
          en = (tinj_phi(ith  ,iz  ,ij)*(1.-wt)*(1.-wz) +
     &          tinj_phi(ith+1,iz  ,ij)*    wt *(1.-wz) +
     &          tinj_phi(ith  ,iz+1,ij)*(1.-wt)*    wz  +
     &          tinj_phi(ith+1,iz+1,ij)*    wt *    wz   )/dr
          if (linj_enormcl) then
            --- Scale the normal E field to match the Child-Langmuir solution
            en = en*fourthirds*dd**onethird
          endif

          --- Set the particle's electric field based off of the normal
          --- electric fields.
          if(l_inj_rz) aa = atan2(yy*aoverb,dvnz(xx))
          ex(ip) = -en*cos(aa)
          ey(ip) = -en*sin(aa)
          if (linj_eperp) then
            ez(ip) = dd*((tinj_phi(ith  ,iz  ,ij)*(1.-wt)*(1.-wz) +
     &                    tinj_phi(ith+1,iz  ,ij)*    wt *(1.-wz) +
     &                    tinj_phi(ith  ,iz+1,ij)*(1.-wt)*    wz  +
     &                    tinj_phi(ith+1,iz+1,ij)*    wt *    wz) -
     &                   (tinj_phi(ith  ,iz+1,ij)*(1.-wt)*(1.-wz) +
     &                    tinj_phi(ith+1,iz+1,ij)*    wt *(1.-wz) +
     &                    tinj_phi(ith  ,iz+2,ij)*(1.-wt)*    wz  +
     &                    tinj_phi(ith+1,iz+2,ij)*    wt *    wz))/dztinj(ij)
          else
            ez(ip) =  0.
          endif

        endif
      enddo

!$OMP MASTER
      if (lw3dtimesubs) timetinj_sete3d = timetinj_sete3d + wtime() - substarttime
!$OMP END MASTER
      return
      end

      real(kind=8) function ellipseperimeter(a,b)
      use Constant
      real(kind=8):: a,b
  Use several terms of Gauss-Kummer series to get good accuracy.
      real(kind=8):: h,p

      h = (a-b)**2/(a+b)**2
      p = pi*(a+b)*(1 + h/4 + h**2/64 + h**3/256 + 25*h**4/16384 + 49*h**5/65536)
      ellipseperimeter = p

      return
      end

      subroutine createparticlesincells(nx,ny,nz,rnn,exgrid,eygrid,ezgrid,
     &                                  condid,lcylindrical,
     &                                  dx,dy,dz,nn,xx,yy,zz,ex,ey,ez,pp)
      use Constant
      integer(ISZ):: nx,ny,nz,nn
      real(kind=8):: rnn(0:nx,0:ny,0:nz)
      real(kind=8):: exgrid(0:nx+1,0:ny,0:nz)
      real(kind=8):: eygrid(0:nx,0:ny+1,0:nz)
      real(kind=8):: ezgrid(0:nx,0:ny,0:nz+1)
      real(kind=8):: condid(0:nx,0:ny,0:nz)
      logical(ISZ):: lcylindrical
      real(kind=8):: dx,dy,dz
      real(kind=8):: xx(nn),yy(nn),zz(nn)
      real(kind=8):: ex(nn),ey(nn),ez(nn)
      real(kind=8):: pp(nn)

      integer(ISZ):: ix,iy,iz
      integer(ISZ):: ii,itot
      real(kind=8):: xshift,yshift,zshift
      real(kind=8):: xscale,yscale,zscale
      real(kind=8):: xmin,ymin,zmin,xmax,ymax,zmax
      real(kind=8):: dxtemp,dytemp,dztemp
      real(kind=8):: rr,rsq1,rsq2,tt
      real(kind=8):: wranf

      xmin = 0.
      ymin = 0.
      zmin = 0.
      xmax = dx*nx
      ymax = dy*ny
      zmax = dz*nz

      if (nx > 0) then
        dxtemp = dx
      else
        dxtemp = 0.
      endif
      if (ny > 0) then
        dytemp = dy
      else
        dytemp = 0.
      endif
      if (nz > 0) then
        dztemp = dz
      else
        dztemp = 0.
      endif

      itot = 1
      do iz=0,nz
        do iy=0,ny
          do ix=0,nx

            --- The shift and scale are to keep particles within the grid.
            xshift = 0.5
            if (ix == 0) xshift = 0.
            yshift = 0.5
            if (iy == 0) yshift = 0.
            zshift = 0.5
            if (iz == 0) zshift = 0.

            xscale = 1.0
            if (ix == 0 .or. ix == nx) xscale = 0.5
            yscale = 1.0
            if (iy == 0 .or. iy == ny) yscale = 0.5
            zscale = 1.0
            if (iz == 0 .or. iz == nz) zscale = 0.5
            
            if (lcylindrical) then

              if (ix == 0) then
                rsq1 = 0.
              else
                rsq1 = (dxtemp*(ix - xshift))**2
              endif
              rsq2 = (dxtemp*(ix - xshift + xscale))**2

              do ii=1,int(rnn(ix,iy,iz))

                rr = sqrt(rsq1 + wranf()*(rsq2 - rsq1))
                tt = 2.*pi*wranf()
                xx(itot) = rr*cos(tt)
                yy(itot) = rr*sin(tt)
                zz(itot) = dztemp*(iz - zshift + zscale*wranf())
                if ((                       rr       < xmax) .and.
     &              (zmin <= zz(itot) .and. zz(itot) < zmax)) then
                  ex(itot) = 0.5*(exgrid(ix,iy,iz) + exgrid(ix+1,iy,iz))
                  ey(itot) = 0.5*(eygrid(ix,iy,iz) + eygrid(ix,iy+1,iz))
                  ez(itot) = 0.5*(ezgrid(ix,iy,iz) + ezgrid(ix,iy,iz+1))
                  pp(itot) = condid(ix,iy,iz)
                  itot = itot + 1
                endif
              enddo

            else

              do ii=1,int(rnn(ix,iy,iz))
                xx(itot) = dxtemp*(ix - xshift + xscale*wranf())
                yy(itot) = dytemp*(iy - yshift + yscale*wranf())
                zz(itot) = dztemp*(iz - zshift + zscale*wranf())
                if ((xmin <= xx(itot) .and. xx(itot) < xmax .or. nx == 0) .and.
     &              (ymin <= yy(itot) .and. yy(itot) < ymax .or. ny == 0) .and.
     &              (zmin <= zz(itot) .and. zz(itot) < zmax .or. nz == 0)) then
                  ex(itot) = 0.5*(exgrid(ix,iy,iz) + exgrid(ix+1,iy,iz))
                  ey(itot) = 0.5*(eygrid(ix,iy,iz) + eygrid(ix,iy+1,iz))
                  ez(itot) = 0.5*(ezgrid(ix,iy,iz) + ezgrid(ix,iy,iz+1))
                  pp(itot) = condid(ix,iy,iz)
                  itot = itot + 1
                endif
              enddo

            endif

          enddo
        enddo
      enddo

      return
      end