top.F



[accumulate_temperature] [addpart] [alotlostpart] [alotpart] [chcklostpart] [chckpart] [checkparticlegroup] [checkz_arrays] [chgparticlesdts] [cigar] [clearpart] [copyarry] [copygrouptogroup] [copypart] [derivqty] [finalize_temperature] [fnice] [gammaadv] [gapfield] [get_zmmnts_stations] [getbeamcom] [getcrossingparticles] [getextrapolatedparticles] [getlabwn] [getnsndtsforsubcycling] [gett] [getvzofz] [getzmmnt] [getzmmnt_weights] [impact_ion] [initlabwn] [inlist] [isdefmpiparallel] [load1d] [load2d] [logicalsheath] [nextpid] [oneiftrue] [partbndwithdata] [particleboundarieswithdata] [particlecopysorteddata] [particlemovesorteddata] [particlesortbyindex] [particlesortxyzwithcopy] [particlesortxyzwithcopynew] [particlesortyzwithcopy] [percurr] [periz] [processlostpart] [psumx] [r2rev] [reorgparticles] [reset_temperature] [rm] [rma] [rnorm] [rnormdig] [rnrev] [rnrevarray] [semitransparent_disc] [set_tslice_locator] [setcurr] [seteb_in_boosted_frame] [setegap] [setgamma] [setregulartgrid] [setu_in_boosted_frame3d] [setu_in_uzboosted_frame3d] [setupImplicit] [setupSelfB] [setupSubcycling] [setuppgroup] [shftlostpart] [shftpart] [shftpartwork] [shiftparticlegroup] [shrinkpart] [species] [sphere4] [sphere4f] [stckyz] [sumarry] [topinit] [topvers] [wrandom] [wrandomgauss] [writarry] [wtime] [wtimeoff] [wtimeon] [wtremain] [xparticleboundaries] [yparticleboundaries] [zeroarry] [zparticleboundaries]

#include top.h
 @(#) File TOP.F, version $Revision: 3.268 $, $Date: 2012/01/20 21:32:41 $
 # Copyright (c) 1990-1998, The Regents of the University of California.
 # All rights reserved.  See LEGAL.LLNL for full text and disclaimer.
   Main source file for the package TOP of the PIC code WARP.
   It handles global version control, and loads computed global variables.
   Alex Friedman, LLNL, (510)422-0827
   David P. Grote, LLNL, (510)423-7194

      subroutine topinit
      use Io
      use Ch_var
      use InGen

   Called at first reference to package.

   Echo the package version

      call topvers (STDOUT)

   Initialize text output file, print versions of all packages
      if (warpout > -1) then
        call outfile (warpout, "Warp PIC output files")
        call topvers (warpout)
        call w3dvers (warpout)
        call wrzvers (warpout)
        call envvers (warpout)
        call f3dvers (warpout)
        call frzvers (warpout)
      endif

      return
      end

[topinit]
      subroutine topvers (iout)
      use TOPversion
   Echoes code version,etc. to output files when they're created
      integer(ISZ):: iout
      call printpkgversion(iout,"Main package TOP",verstop)
      return
      end

[stptcl3d] [stptclrz]
      subroutine cigar(np,zunifrm,zpunifrm,z,zp,perpscal,straight,
     &                 szp,phi,zpm,zpms)
      use Constant
      integer(ISZ):: np
      real(kind=8):: straight
      real(kind=8):: zunifrm(np),zpunifrm(np),z(np),zp(np),perpscal(np)
      real(kind=8):: szp(np),phi(np),zpm(np),zpms(np)

      This is a self-contained subroutine which calculates the normalized
      velocities and positions for a "neuffer-type" distribution[1],
      consisting of a uniform central region and quadratic end caps.
      The line charge in the end caps is assumed symmetric.
      The output transverse pseudo-radius can be used with your favorite
      algorithm to generate the transverse distribution.  If i/epsilon
      scaling is assumed for the current in the end caps, a single
      solution of the envelope equations can be used to de-scale the
      transverse variables.
 
                                               I. Haber
 
      Cubic solver of  x**3+a2*x**2+a1*x+a0=0 from Abromiwitz and Stegun p. 17
      greatly simplified for the case needed here.
      This subroutine is also vectorized?
                                              D.P.Grote
 
   input arguments:
 
       zuniform   a floating point number on the interval (0.,1.) which is
                  mapped into the longitudinal position on a beam bunch in
                  interval (0.,1.)
       zpunifrm   a floating point number in the interval (-.5,.5) which is
                  mapped into a z velocity appropriate to the z position on
                  the bunch.
       straight   the fraction of the beam length occupied by a uniform
                  line charge section at the bunch center.
 
   output arguments:
 
       z          longitudinal position of a particle in z relative to a bunch
                  on the interval (0.,1.).
       zp         longitudinal velocity at the position z, normalized to the
                  maximum velocity at the bunch center.
       perpscal   the sqrt of line charge at position z which can be used to
                  scale transverse positions and velocities.
 
   others:
 
       szp, phi, zpm, zpms
                  all scratch arrays of size np
 
   reference:
 
      David Neuffer, "longitudinal motion in high current ion beams - a
      self-consistent phase space distribution with an envelope equation,"
      IEEE Trans. Nucl. Sci. vol ns-26, June 1979, p. 3031.
 
 
 
      initialization section.  the logic only works if local
      storage is non-volatile.
 
      real(kind=8):: pion2,oneonpi,zs,zend,zendr,zmap,zl,zendi
      real(kind=8):: third,root3
      integer(ISZ):: i

      pion2 = pi*0.5
      oneonpi = 1./pi
      zs = straight
      zend = (1.-zs)*0.5
      zendr = 1.-zend
      zmap = zs+(4./3.)*zend
      zl = zend/3.
      zendi = 1./dvnz(zend)

  initialize stuff for cubic solver
      third = 1./3.
      root3 = sqrt(3.)
      a1 = -3.
      a2 = 0.
      q = a1*third-a2**2/9.0

      do i=1,np
 
      map the center of the beam into the uniform region
 
          z(i) = zl + zunifrm(i)*zmap
          zpm(i) = 0.
 
      solve a cubic to map the two parabolic end caps
 
          if(z(i) < zend) then
   Solve for the third cubic root
             t = q**3+r**2ɘ always, so three real(kind=8):: roots
             phi(i) = atan2(sqrt(1.-(1.5*(z(i)-zend)*zendi)**2),
     &                     -1.5*(z(i)-zend)*zendi)*third
             zpm(i) = -cos(phi(i))+root3*sin(phi(i))
             z(i) = (1.+zpm(i))*zend
          elseif(z(i) > zendr) then
   Solve for the third cubic root
             t = q**3+r**2ɘ always, so three real(kind=8):: roots
             phi(i) = atan2(sqrt(1.-(1.5*(z(i)-zendr)*zendi)**2),
     &                      -1.5*(z(i)-zendr)*zendi)*third
             zpm(i) = -cos(phi(i))+root3*sin(phi(i))
             z(i) = zpm(i)*zend+zendr
          endif
 
       newton-raphson iteration to solve for velocity mapping of the
       interval -0.5 < zpunifrm < +0.5 .
 
          zpms(i) = 1.0-zpm(i)*zpm(i)
          perpscal(i) = sqrt(zpms(i))
 
      initial guess (phi is a temp for zp to allow in place calculation)
 
          phi(i) = pion2*perpscal(i)*zpunifrm(i)
 
       The four iterations of the loop are expanded for vectorization.
       Original expression before reducing number of divides.
          phi(i) = phi(i) - ((phi(i)*szp(i)+zpms(i)*asin(phi(i)/perpscal(i)))/
     &            (zpms(i)*pi) - zpunifrm(i))/(twoonpi/zpms(i)*szp(i))

          if (phi(i) .ne. 0.) then
            szp(i) = sqrt(zpms(i)-phi(i)**2)
            phi(i) = phi(i) - ((phi(i)*szp(i)+zpms(i)*asin(phi(i)/perpscal(i)))*
     &               oneonpi - zpunifrm(i)*zpms(i))*pion2/szp(i)

            szp(i) = sqrt(zpms(i)-phi(i)**2)
            phi(i) = phi(i) - ((phi(i)*szp(i)+zpms(i)*asin(phi(i)/perpscal(i)))*
     &               oneonpi - zpunifrm(i)*zpms(i))*pion2/szp(i)

            szp(i) = sqrt(zpms(i)-phi(i)**2)
            phi(i) = phi(i) - ((phi(i)*szp(i)+zpms(i)*asin(phi(i)/perpscal(i)))*
     &               oneonpi - zpunifrm(i)*zpms(i))*pion2/szp(i)

            szp(i) = sqrt(zpms(i)-phi(i)**2)
            phi(i) = phi(i) - ((phi(i)*szp(i)+zpms(i)*asin(phi(i)/perpscal(i)))*
     &               oneonpi - zpunifrm(i)*zpms(i))*pion2/szp(i)
          endif

          zp(i) = phi(i)

      enddo

      return
      end

[derivqty] [wxygen]
      subroutine species
      use InPart
      use Constant
      use Beam_acc

  Sets up species related quantities which were not set by the user, based
  on the rest of the user's input.    


      integer(ISZ):: is

      First, set parameters for whole beam (for envelope solver).  If
      they are unset, set to parameters of species 1.

      --- Beam size in X
      if (a0 == 0. .and. a0_s(1) /= LARGEPOS) a0 = a0_s(1)
      --- Beam divergence in X
      if (ap0 == 0. .and. ap0_s(1) /= LARGEPOS) ap0 = ap0_s(1)
      --- Beam size in Y
      if (b0 == 0. .and. b0_s(1) /= LARGEPOS) b0 = b0_s(1)
      --- Beam divergence in Y
      if (bp0 == 0. .and. bp0_s(1) /= LARGEPOS) bp0 = bp0_s(1)
      --- Beam centroid in X
      if (x0 == 0. .and. x0_s(1) /= LARGEPOS) x0 = x0_s(1)
      --- Beam centroid angle in X
      if (xp0 == 0. .and. xp0_s(1) /= LARGEPOS) xp0 = xp0_s(1)
      --- Beam centroid in Y
      if (y0 == 0. .and. y0_s(1) /= LARGEPOS) y0 = y0_s(1)
      --- Beam centroid angle in Y
      if (yp0 == 0. .and. yp0_s(1) /= LARGEPOS) yp0 = yp0_s(1)
      --- Atomic number
      if (aion == 0. .and. aion_s(1) /= LARGEPOS) aion = aion_s(1)
      --- Particle energy
      if (ekin == 0. .and. ekin_s(1) /= LARGEPOS) ekin = ekin_s(1)
      --- Emittances
      if (emit == 0. .and. emit_s(1) /= LARGEPOS) emit = emit_s(1)
      if (emitx == 0. .and. emitx_s(1) /= LARGEPOS) emitx = emitx_s(1)
      if (emity == 0. .and. emity_s(1) /= LARGEPOS) emity = emity_s(1)
      --- Normalized emittances
      if (emitn == 0. .and. emitn_s(1) /= LARGEPOS) emitn = emitn_s(1)
      if (emitnx == 0. .and. emitnx_s(1) /= LARGEPOS) emitnx = emitnx_s(1)
      if (emitny == 0. .and. emitny_s(1) /= LARGEPOS) emitny = emitny_s(1)
      --- Current
      if (ibeam == 0. .and. ibeam_s(1) /= LARGEPOS) ibeam = ibeam_s(1)
      --- Charge state
      if (zion == 0. .and. zion_s(1) /= LARGEPOS) zion = zion_s(1)
      --- Axial velocity
      if (vbeam == 0. .and. vbeam_s(1) /= LARGEPOS) vbeam = vbeam_s(1)
      --- Axial velocity tilt
      if (vtilt == 0. .and. vtilt_s(1) /= LARGEPOS) vtilt = vtilt_s(1)
      --- Transverse thermal spread
      if (vthperp == 0. .and. vthperp_s(1) /= LARGEPOS) vthperp = vthperp_s(1)
      --- Axial thermal spread
      if (vthz == 0. .and. vthz_s(1) /= LARGEPOS) vthz = vthz_s(1)
      --- Minimum initial z of beam
      if (zimin == 0. .and. zimin_s(1) /= LARGEPOS) zimin = zimin_s(1)
      --- Maximum initial z of beam
      if (zimax == 0. .and. zimax_s(1) /= LARGEPOS) zimax = zimax_s(1)
      --- Fractional length of uniform part of beam
      if (straight == 0. .and. straight_s(1) /= LARGEPOS) straight = straight_s(1)

      Now, set parameters for species.  If they are unset, set to the whole
      beam parameters. Also, always reset the values of species number one
      since the single species input parameters are kept consistent with
      species one parameters. This alleviates many of the problems of having a
      seperate variable for input and for internal use.

      --- Make sure that space has been allocated for the arrays.
      if (ns > 1) then
        call gchange("InPart",0)
      endif

      do is=1,ns
        --- Beam size in X
        if (a0_s(is) == LARGEPOS .or. is == 1) then
          a0_s(is) = a0
        endif
        --- Beam divergence in X
        if (ap0_s(is) == LARGEPOS .or. is == 1) then
          ap0_s(is) = ap0
        endif
        --- Beam size in Y
        if (b0_s(is) == LARGEPOS .or. is == 1) then
          b0_s(is) = b0
        endif
        --- Beam divergence in Y
        if (bp0_s(is) == LARGEPOS .or. is == 1) then
          bp0_s(is) = bp0
        endif
        --- Beam centroid in X
        if (x0_s(is) == LARGEPOS .or. is == 1) then
          x0_s(is) = x0
        endif
        --- Beam centroid angle in X
        if (xp0_s(is) == LARGEPOS .or. is == 1) then
          xp0_s(is) = xp0
        endif
        --- Beam centroid in Y
        if (y0_s(is) == LARGEPOS .or. is == 1) then
          y0_s(is) = y0
        endif
        --- Beam centroid angle in Y
        if (yp0_s(is) == LARGEPOS .or. is == 1) then
          yp0_s(is) = yp0
        endif
        --- Atomic number
        if (aion_s(is) == LARGEPOS .or. is == 1) then
          aion_s(is) = aion
        endif
        --- Particle energy
        if (ekin_s(is) == LARGEPOS .or. is == 1) then
          ekin_s(is) = ekin
        endif
        --- Emittance
        if (emit_s(is) == LARGEPOS .or. is == 1) then
          emit_s(is) = emit
        endif
        if (emitx_s(is) == LARGEPOS .or. is == 1) then
          emitx_s(is) = emitx
        endif
        if (emity_s(is) == LARGEPOS .or. is == 1) then
          emity_s(is) = emity
        endif
        --- Normalized emittance
        if (emitn_s(is) == LARGEPOS .or. is == 1) then
          emitn_s(is) = emitn
        endif
        if (emitnx_s(is) == LARGEPOS .or. is == 1) then
          emitnx_s(is) = emitnx
        endif
        if (emitny_s(is) == LARGEPOS .or. is == 1) then
          emitny_s(is) = emitny
        endif
        --- Current
        if (ibeam_s(is) == LARGEPOS .or. is == 1) then
          ibeam_s(is) = ibeam
        endif
        --- Charge state
        if (zion_s(is) == LARGEPOS .or. is == 1) then
          zion_s(is) = zion
        endif
        --- Axial velocity
        if (vbeam_s(is) == LARGEPOS .or. is == 1) then
          vbeam_s(is) = vbeam
        endif
        --- Axial velocity tilt
        if (vtilt_s(is) == LARGEPOS .or. is == 1) then
          vtilt_s(is) = vtilt
        endif
        --- Transverse thermal spread
        if (vthperp_s(is) == LARGEPOS .or. is == 1) then
          vthperp_s(is) = vthperp
        endif
        --- Axial thermal spread
        if (vthz_s(is) == LARGEPOS .or. is == 1) then
          vthz_s(is) = vthz
        endif
        --- Minimum initial z of beam
        if (zimin_s(is) == LARGEPOS .or. is == 1) then
          zimin_s(is) = zimin
        endif
        --- Maximum initial z of beam
        if (zimax_s(is) == LARGEPOS .or. is == 1) then
          zimax_s(is) = zimax
        endif
        --- Fractional length of uniform part of beam
        if (straight_s(is) == LARGEPOS .or. is == 1) then
          straight_s(is) = straight
        endif
      enddo

      return
      end

[cirgen] [envgen] [hergen] [w3dgen] [wrzgen] [wxygen]
      subroutine derivqty
      use InPart
      use Constant
      use Beam_acc

   Sets globally derived quantities that need computation.


      integer(ISZ):: is
      real(kind=8):: ke,u

      Set species data 
      call species  

      Set constants that are derived from one another
      --- Magnetic constant = 4*pi*1.e-7
      mu0 = 4.*pi*1.e-7
      --- Conversion factor from joules to eV is just echarge
      jperev = echarge
      --- Epsilon_0 calculated from speed of light and mu_0
      eps0 = 1./(mu0*clight*clight)

      --- Compute gammabar and vbeam or ekin (beam kinetic energy) from
      --- whichever of the two quantities, vbeam or ekin, the user has
      --- chosen to set.  (If ekin is set, then vbeam = 0, and vice-versa.)
      --- Include both relativistic and nonrelativistic cases.
      --- The do loop covers each species and what follows covers the
      --- beam in general.
      if (lrelativ) then

        --- loop over species
        do is=1,ns
          if (aion_s(is) .ne. 0.) then
            if (vbeam_s(is) == 0.) then

              --- Beam energy in units of mc**2
              ke = jperev*ekin_s(is)/dvnz(aion_s(is)*amu*clight**2)
              gammabar = 1. + ke

              --- The expression for vbeam was rewritten to avoid the
              --- problem of taking the difference of nearly equal numbers
              --- (i.e.  (1-1/gammabar) where gammabar ~ 1).
              --- vbeam_s(is) = clight * sqrt(1.-1./gammbar**2)
              vbeam_s(is) = clight * sqrt((2*ke+ke**2)/gammabar**2)

            elseif(ekin_s(is) == 0.) then

              u = (vbeam_s(is)/clight)**2
              gammabar = 1. / sqrt (1. - u)

              --- The expression for ekin was rewritten to avoid the
              --- problem of taking the difference of nearly equal numbers.
              --- ekin = (aion_s(is)*amu*clight**2)*(gammabar - 1.)/jperev
              ekin = (aion_s(is)*amu*clight**2)*(u/(sqrt(1.-u)+1.-u))/jperev

            endif
          endif

        enddo

        --- for the beam in general
        if (vbeam == 0.) then
          ke = jperev * ekin / dvnz(aion * amu * clight**2)
          gammabar = 1. + ke
          vbeam = clight * sqrt((2*ke+ke**2)/gammabar**2)
        elseif(ekin == 0.) then
          u = (vbeam/clight)**2
          gammabar = 1. / sqrt (1. - u)
          --- ekin = (aion * amu * clight**2) * (gammabar - 1.) / jperev
          ekin = (aion * amu * clight**2)*(u/(sqrt(1.-u)+1.-u))/jperev
        endif

      else

        --- non-relativistic
        --- Note that in the expression for vbeam, amu is outside of the dvnz
        --- macro since it causes a loss of accuracy since amu is so small.
        --- This makes the assumption that amu would never be set to zero
        --- (it should never even be changed and should in fact be truly a
        --- constant).

        gammabar = 1.

        --- loop over species
        do is=1,ns
          if (aion_s(is) .ne. 0.) then
            if (vbeam_s(is) == 0.) then
              vbeam_s(is) = sqrt(2.*ekin_s(is)*jperev/dvnz(aion_s(is))/amu)
            elseif(ekin_s(is) == 0.) then
              ekin_s(is) = 0.5*(aion_s(is)*amu*vbeam_s(is)**2)/jperev
            endif
          endif
        enddo

        --- for the beam in general
        if (vbeam == 0.) then
          vbeam = sqrt(2.*ekin*jperev/dvnz(aion)/amu)
        elseif(ekin == 0.) then
          ekin = 0.5*(aion*amu*vbeam**2)/jperev
        endif

      endif

      --- Convert between emittance and normalized emittance
      --- and set the x- and y- plane emittances.  Here a somewhat
      --- confusing case structure is employed.  Note that ouside of
      --- envelope matching routines, the code only employs emitx and emity.

      --- Set x- and y-plane emittances from whole beam emittances if
      --- either is set, or from the x- and y- plane normalized emittances.
      --- This is done this way so that the user can later change emit for
      --- example and have emitx and emity changed appropriately when
      --- this routine is called again.
      if (emit .ne. 0.) then
        emitx = emit
        emity = emit
      elseif (emitn .ne. 0) then
        emitx = emitn*clight/dvnz(vbeam*gammabar)
        emity = emitn*clight/dvnz(vbeam*gammabar)
      elseif (emitnx .ne. 0. .or. emitny .ne. 0.) then
        emitx = emitnx*clight/dvnz(vbeam*gammabar)
        emity = emitny*clight/dvnz(vbeam*gammabar)
      endif

      --- Loop over species with the same emittance construction above.
      do is=1,ns

        --- Set relativistic gamma factor for this species.
        if (lrelativ) gammabar = 1./sqrt(1.-(vbeam_s(is)/clight)**2) 

        --- (See comment above.)
        if (emit_s(is) .ne. 0.) then
          emitx_s(is) = emit_s(is)
          emity_s(is) = emit_s(is)
        elseif (emitn_s(is) .ne. 0) then
          emitx_s(is) = emitn_s(is)*clight/dvnz(vbeam_s(is)*gammabar)
          emity_s(is) = emitn_s(is)*clight/dvnz(vbeam_s(is)*gammabar)
        elseif (emitnx_s(is) .ne. 0. .or. emitny_s(is) .ne. 0.) then
          emitx_s(is) = emitnx_s(is)*clight/dvnz(vbeam_s(is)*gammabar)
          emity_s(is) = emitny_s(is)*clight/dvnz(vbeam_s(is)*gammabar)
        endif

      enddo

      --- Reset relativistic gamma factor for the whole beam. For the
      --- non-relativistic case, it has already been set to 1.
      if (lrelativ) gammabar = 1./sqrt(1.-(vbeam/clight)**2)

      --- compute geometric factor needed for wave speed and (possibly) 
      --- ears calculation 
      if (rwall > 0. .and. gfactor == 0.) then
         gfactor = log(rwall**2/dvnz(a0*b0))
      endif

      return
      end

[getzmmnt_weights] [padvnc3d] [padvncrz] [padvncxy] [step3d] [stepxy] [wrzgen]
      subroutine getzmmnt(np,xp,yp,zp,uxp,uyp,uzp,gaminv,q,m,w,dt,dtscale,
     &                    itask,nplive,
     &                    uxpo,uypo,uzpo,is,isid,ismax,maxp,minp,zmmnts0,zmmnts)
      use Constant
      use Beam_acc
      use InDiag
      use Z_Moments
      use Win_Moments
      use Moments
      use Picglb
      use ExtPart
      use Timers
      use Particles, only: wpid
      integer(ISZ):: np,itask,nplive,is,isid,ismax
      real(kind=8):: q,m,w,dt,dtscale
      real(kind=8):: xp(np), yp(np), zp(np)
      real(kind=8):: uxp(np), uyp(np), uzp(np), gaminv(np)
      real(kind=8):: uxpo(np), uypo(np), uzpo(np)
      real(kind=8):: maxp(6,0:nszmmnt),minp(6,0:nszmmnt)
      real(kind=8):: zmmnts0(NUMZMMNT,0:nszmmnt)
      real(kind=8):: zmmnts(0:nzmmnt,NUMZMMNT,0:nszmmnt)

   Sets moments for species 1 in as a function of z.
   Interpolation is done to grid centers
   Note that lost particles are to the left of the z grid.
   Note: "window zero" includes all particles, and no extrapolation is done
   Note: zbar and zsqbar calculations are meaningless except for 
         window zero (WE SHOULD DELETE ?)
   Note: Vectorized over moments.
         particle moment calculations are vectorized over particles
         moments are summed into a 2-D array, vectorized over moments
         then they are dumped into the seperate 1-D arrays in itask=3
   Three parts
   When itask=1  zeros out all moments
        itask=2  sums moments from particles
        itask=3  divides by number of particles, calculates emittances and rms

   Note that the moments are scaled by the dtscale factor. This accounts for
   the fact that particles with larger dtscale would normally contribute more
   if nominal dt would be used since they would spend proportionately more
   time in any location.

      real(kind=8),allocatable:: pp(:,:)
      integer(ISZ),allocatable:: iz(:)
      integer(ISZ):: im,izm,iw,ip,icell,i,js
      real(kind=8):: dti,dtip1,wz1,wz0,zwin
      real(kind=8):: oneondt,clighti,vzi
      real(kind=8):: pnumi
      real(kind=8):: delxsq,delxxp,delxpsq,delxvx,delvxsq
      real(kind=8):: delysq,delyyp,delypsq,delyvy,delvysq
      real(kind=8):: delxy,delxyp
      real(kind=8):: delyxp,delxpyp,delxvy,delyvx,delvxvy
      real(kind=8):: delzsq,delvzsq,delzvz
      real(kind=8):: tr,tg,th
      real(kind=8):: gamma
      real(kind=8):: timetemp,wtime
      real(kind=8):: zmmntscopy(NUMZMMNT)
#ifdef J90
      real(kind=8):: pmmnts(NUMZMMNT,np)
#endif

      timetemp = wtime()

      if (ifzmmnt == 0) return

      oneondt = 1./dvnz(dt)

      --- If lspeciesmoments is true, then calculate the moments for each
      --- species separately. The moments for all species combined will be
      --- done afterwards. Otherwise, only calculate the combined moments.
      if (lspeciesmoments) then
        --- Note that species index for these arrays is zero based.
        js = isid - 1
      else
        js = 0
      endif

      if (itask == 1 .and. .not. laccumulate_zmoments) then

         if (lspeciesmoments) then
           --- Check if the moments are to be calculated separately for
           --- each species. If so, check if nszmmnt already has been set
           --- appropriately. If not, set it and allocate the arrays.
           --- If only one species, then don't have separate species data.
           if (nszmmnt < ismax .and. ismax > 1) then
             nszmmnt = ismax
             call gchange("Z_Moments",0)
           endif
         else
           if (nszmmnt /= 0) then
             nszmmnt = 0
             call gchange("Z_Moments",0)
           endif
         endif

         --- This is a very kludgy thing to do. This assumes that this
         --- getzmmnt routine will always be called with the temp arrays
         --- passed in when itask==1. The only reason the work arrays are
         --- passed in anyway is because of the OpenMP code which requires
         --- the work arrays to be thread private when called with itask==2.
         tempmaxp = -LARGEPOS
         tempminp = +LARGEPOS
         tempzmmnts0 = 0.
         tempzmmnts = 0.
         if (nepwin > 0) nep = 0
         if(wpidɬ) nplive = 0
         zmomentscalculated = .false.
         zmomentscalculated(nszmmnt) = .true.
      endif

      if (itask == 2) then

        allocate(pp(0:18,np),iz(np))

        --- Save the mass and weight
        zmmntsq(js) = q
        zmmntsm(js) = m
        zmmntsw(js) = w

        --- Set flag that moments for this species has been calculated.
        zmomentscalculated(js) = .true.

        --- Set maximum and minimum particle coordinate variables
        --- set multiplier so lost particles don't contribute
        do ip = 1, np
          maxp(1,js) = max(maxp(1,js), xp(ip))
          minp(1,js) = min(minp(1,js), xp(ip))
          maxp(2,js) = max(maxp(2,js), yp(ip))
          minp(2,js) = min(minp(2,js), yp(ip))
          maxp(3,js) = max(maxp(3,js), zp(ip))
          minp(3,js) = min(minp(3,js), zp(ip))
          if (l_momentum_moments .or. .not. lrelativ) then
            maxp(4,js) = max(maxp(4,js), uxp(ip))
            minp(4,js) = min(minp(4,js), uxp(ip))
            maxp(5,js) = max(maxp(5,js), uyp(ip))
            minp(5,js) = min(minp(5,js), uyp(ip))
            maxp(6,js) = max(maxp(6,js), uzp(ip))
            minp(6,js) = min(minp(6,js), uzp(ip))
          else
            maxp(4,js) = max(maxp(4,js), gaminv(ip)*uxp(ip))
            minp(4,js) = min(minp(4,js), gaminv(ip)*uxp(ip))
            maxp(5,js) = max(maxp(5,js), gaminv(ip)*uyp(ip))
            minp(5,js) = min(minp(5,js), gaminv(ip)*uyp(ip))
            maxp(6,js) = max(maxp(6,js), gaminv(ip)*uzp(ip))
            minp(6,js) = min(minp(6,js), gaminv(ip)*uzp(ip))
          endif

          --- Set flag so particles out of range don't contribute
          ---   not added to total moments
          ---   weights set to zero for z moments
          pp(0,ip) = 1.
          if (ifzmmnt == 1) then
            if ((zp(ip) - zbeam) < zmmntmin .or.
     &          (zp(ip) - zbeam) > zmmntmax) pp(0,ip) = 0.
          else
            if ((zp(ip) - zbeam) <  zmmntmin .or.
     &          (zp(ip) - zbeam) >= zmmntmax) pp(0,ip) = 0.
          endif

        enddo

        --- Accumate global moments first.

        --- Used to add in dtscale factor below.
        if (dtscale /= 1.) zmmntscopy = zmmnts0(:,js)

 -------------------------------------------------------------------------
#ifdef J90
        --- This method is faster for vectorized machines, primarily the J90.
        --- put particle moments into pmmnts array for total moments
        if (l_momentum_moments .or. .not. lrelativ) then
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            pmmnts( 1,ip) = 1.
            pmmnts( 2,ip) = xp(ip)
            pmmnts( 3,ip) = yp(ip)
            pmmnts( 4,ip) = zp(ip)
            pmmnts( 7,ip) = uxp(ip)
            pmmnts( 8,ip) = uyp(ip)
            pmmnts( 9,ip) = uzp(ip)
            pmmnts(10,ip) = xp(ip)*yp(ip)
            pmmnts(12,ip) = xp(ip)**2
            pmmnts(13,ip) = yp(ip)**2
            pmmnts(14,ip) = zp(ip)**2
            pmmnts(17,ip) = (uxp(ip))**2
            pmmnts(18,ip) = (uyp(ip))**2
            pmmnts(19,ip) = (uzp(ip))**2
            pmmnts(22,ip) = zp(ip)*uzp(ip)
            pmmnts(25,ip) = xp(ip)*uzp(ip)
            pmmnts(26,ip) = yp(ip)*uzp(ip)
            pmmnts(27,ip) = uxp(ip)*uzp(ip)
            pmmnts(28,ip) = uyp(ip)*uzp(ip)
            pmmnts( 5,ip) = uxp(ip)*vzi
            pmmnts( 6,ip) = uyp(ip)*vzi
            pmmnts(11,ip) = uxp(ip)*uyp(ip)*vzi**2
            pmmnts(15,ip) = (uxp(ip)*vzi)**2
            pmmnts(16,ip) = (uyp(ip)*vzi)**2
            pmmnts(20,ip) = xp(ip)*uxp(ip)*vzi
            pmmnts(21,ip) = yp(ip)*uyp(ip)*vzi
            pmmnts(23,ip) = xp(ip)*uyp(ip)*vzi
            pmmnts(24,ip) = yp(ip)*uxp(ip)*vzi
            pmmnts(29,ip) = xp(ip)*uxp(ip)
            pmmnts(30,ip) = yp(ip)*uyp(ip)
            pmmnts(31,ip) = xp(ip)*uyp(ip)
            pmmnts(32,ip) = yp(ip)*uxp(ip)
            pmmnts(33,ip) = uxp(ip)*uyp(ip)
          enddo
        else
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            pmmnts( 1,ip) = 1.
            pmmnts( 2,ip) = xp(ip)
            pmmnts( 3,ip) = yp(ip)
            pmmnts( 4,ip) = zp(ip)
            pmmnts( 7,ip) = uxp(ip)*gaminv(ip)
            pmmnts( 8,ip) = uyp(ip)*gaminv(ip)
            pmmnts( 9,ip) = uzp(ip)*gaminv(ip)
            pmmnts(10,ip) = xp(ip)*yp(ip)
            pmmnts(12,ip) = xp(ip)**2
            pmmnts(13,ip) = yp(ip)**2
            pmmnts(14,ip) = zp(ip)**2
            pmmnts(17,ip) = (gaminv(ip)*uxp(ip))**2
            pmmnts(18,ip) = (gaminv(ip)*uyp(ip))**2
            pmmnts(19,ip) = (gaminv(ip)*uzp(ip))**2
            pmmnts(22,ip) = zp(ip)*gaminv(ip)*uzp(ip)
            pmmnts(25,ip) = xp(ip)*uzp(ip)*gaminv(ip)
            pmmnts(26,ip) = yp(ip)*uzp(ip)*gaminv(ip)
            pmmnts(27,ip) = uxp(ip)*uzp(ip)*gaminv(ip)**2
            pmmnts(28,ip) = uyp(ip)*uzp(ip)*gaminv(ip)**2
            pmmnts( 5,ip) = uxp(ip)*vzi
            pmmnts( 6,ip) = uyp(ip)*vzi
            pmmnts(11,ip) = uxp(ip)*uyp(ip)*vzi**2
            pmmnts(15,ip) = (uxp(ip)*vzi)**2
            pmmnts(16,ip) = (uyp(ip)*vzi)**2
            pmmnts(20,ip) = xp(ip)*uxp(ip)*vzi
            pmmnts(21,ip) = yp(ip)*uyp(ip)*vzi
            pmmnts(23,ip) = xp(ip)*uyp(ip)*vzi
            pmmnts(24,ip) = yp(ip)*uxp(ip)*vzi
            pmmnts(29,ip) = xp(ip)*uxp(ip)*gaminv(ip)
            pmmnts(30,ip) = yp(ip)*uyp(ip)*gaminv(ip)
            pmmnts(31,ip) = xp(ip)*uyp(ip)*gaminv(ip)
            pmmnts(32,ip) = yp(ip)*uxp(ip)*gaminv(ip)
            pmmnts(33,ip) = uxp(ip)*uyp(ip)*gaminv(ip)**2
          enddo
        endif

        --- sum total moments
        --- Timings show that this loop runs much faster if the inner loop is
        --- completely unrolled.  It is faster on both HP workstations and
        --- CRAY C90.  WARNING: if NUMZMMNT changes, this loop needs to be
        --- adjusted to reflect that change.  Note that the similar loops
        --- below are still faster in their present form.
        do ip=1,np
          do im=1,NUMZMMNT
            zmmnts0(im) = zmmnts0(im) + pmmnts(ip,im)
          enddo
          zmmnts0( 1,js) = zmmnts0( 1,js) + pmmnts( 1,ip)
          zmmnts0( 2,js) = zmmnts0( 2,js) + pmmnts( 2,ip)
          zmmnts0( 3,js) = zmmnts0( 3,js) + pmmnts( 3,ip)
          zmmnts0( 4,js) = zmmnts0( 4,js) + pmmnts( 4,ip)
          zmmnts0( 5,js) = zmmnts0( 5,js) + pmmnts( 5,ip)
          zmmnts0( 6,js) = zmmnts0( 6,js) + pmmnts( 6,ip)
          zmmnts0( 7,js) = zmmnts0( 7,js) + pmmnts( 7,ip)
          zmmnts0( 8,js) = zmmnts0( 8,js) + pmmnts( 8,ip)
          zmmnts0( 9,js) = zmmnts0( 9,js) + pmmnts( 9,ip)
          zmmnts0(10,js) = zmmnts0(10,js) + pmmnts(10,ip)
          zmmnts0(11,js) = zmmnts0(11,js) + pmmnts(11,ip)
          zmmnts0(12,js) = zmmnts0(12,js) + pmmnts(12,ip)
          zmmnts0(13,js) = zmmnts0(13,js) + pmmnts(13,ip)
          zmmnts0(14,js) = zmmnts0(14,js) + pmmnts(14,ip)
          zmmnts0(15,js) = zmmnts0(15,js) + pmmnts(15,ip)
          zmmnts0(16,js) = zmmnts0(16,js) + pmmnts(16,ip)
          zmmnts0(17,js) = zmmnts0(17,js) + pmmnts(17,ip)
          zmmnts0(18,js) = zmmnts0(18,js) + pmmnts(18,ip)
          zmmnts0(19,js) = zmmnts0(19,js) + pmmnts(19,ip)
          zmmnts0(20,js) = zmmnts0(20,js) + pmmnts(20,ip)
          zmmnts0(21,js) = zmmnts0(21,js) + pmmnts(21,ip)
          zmmnts0(22,js) = zmmnts0(22,js) + pmmnts(22,ip)
          zmmnts0(23,js) = zmmnts0(23,js) + pmmnts(23,ip) 
          zmmnts0(24,js) = zmmnts0(24,js) + pmmnts(24,ip) 
          zmmnts0(25,js) = zmmnts0(25,js) + pmmnts(25,ip) 
          zmmnts0(26,js) = zmmnts0(26,js) + pmmnts(26,ip) 
          zmmnts0(27,js) = zmmnts0(27,js) + pmmnts(27,ip) 
          zmmnts0(28,js) = zmmnts0(28,js) + pmmnts(28,ip) 
          zmmnts0(29,js) = zmmnts0(29,js) + pmmnts(29,ip) 
          zmmnts0(30,js) = zmmnts0(30,js) + pmmnts(30,ip) 
          zmmnts0(31,js) = zmmnts0(31,js) + pmmnts(31,ip) 
          zmmnts0(32,js) = zmmnts0(32,js) + pmmnts(32,ip) 
          zmmnts0(33,js) = zmmnts0(33,js) + pmmnts(33,ip) 
        enddo

#else

 -------------------------------------------------------------------------
        --- This method is faster for RISC and pentium machines.
        --- sum particle moments into total moments
        if (l_momentum_moments .or. .not. lrelativ) then
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            zmmnts0( 1,js) = zmmnts0( 1,js) + 1.
            zmmnts0( 2,js) = zmmnts0( 2,js) + xp(ip)
            zmmnts0( 3,js) = zmmnts0( 3,js) + yp(ip)
            zmmnts0( 4,js) = zmmnts0( 4,js) + zp(ip)
            zmmnts0( 7,js) = zmmnts0( 7,js) + uxp(ip)
            zmmnts0( 8,js) = zmmnts0( 8,js) + uyp(ip)
            zmmnts0( 9,js) = zmmnts0( 9,js) + uzp(ip)
            zmmnts0(10,js) = zmmnts0(10,js) + xp(ip)*yp(ip)
            zmmnts0(12,js) = zmmnts0(12,js) + xp(ip)**2
            zmmnts0(13,js) = zmmnts0(13,js) + yp(ip)**2
            zmmnts0(14,js) = zmmnts0(14,js) + zp(ip)**2
            zmmnts0(17,js) = zmmnts0(17,js) + (uxp(ip))**2
            zmmnts0(18,js) = zmmnts0(18,js) + (uyp(ip))**2
            zmmnts0(19,js) = zmmnts0(19,js) + (uzp(ip))**2
            zmmnts0(22,js) = zmmnts0(22,js) + zp(ip)*uzp(ip)
            zmmnts0(25,js) = zmmnts0(25,js) + xp(ip)*uzp(ip)
            zmmnts0(26,js) = zmmnts0(26,js) + yp(ip)*uzp(ip)
            zmmnts0(27,js) = zmmnts0(27,js) + uxp(ip)*uzp(ip)
            zmmnts0(28,js) = zmmnts0(28,js) + uyp(ip)*uzp(ip)
            zmmnts0( 5,js) = zmmnts0( 5,js) + uxp(ip)*vzi
            zmmnts0( 6,js) = zmmnts0( 6,js) + uyp(ip)*vzi
            zmmnts0(11,js) = zmmnts0(11,js) + uxp(ip)*uyp(ip)*vzi**2
            zmmnts0(15,js) = zmmnts0(15,js) + (uxp(ip)*vzi)**2
            zmmnts0(16,js) = zmmnts0(16,js) + (uyp(ip)*vzi)**2
            zmmnts0(20,js) = zmmnts0(20,js) + xp(ip)*uxp(ip)*vzi
            zmmnts0(21,js) = zmmnts0(21,js) + yp(ip)*uyp(ip)*vzi
            zmmnts0(23,js) = zmmnts0(23,js) + xp(ip)*uyp(ip)*vzi
            zmmnts0(24,js) = zmmnts0(24,js) + yp(ip)*uxp(ip)*vzi
            zmmnts0(29,js) = zmmnts0(29,js) + xp(ip)*uxp(ip)
            zmmnts0(30,js) = zmmnts0(30,js) + yp(ip)*uyp(ip)
            zmmnts0(31,js) = zmmnts0(31,js) + xp(ip)*uyp(ip)
            zmmnts0(32,js) = zmmnts0(32,js) + yp(ip)*uxp(ip)
            zmmnts0(33,js) = zmmnts0(33,js) + uxp(ip)*uyp(ip)
          enddo
        else
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            zmmnts0( 1,js) = zmmnts0( 1,js) + 1.
            zmmnts0( 2,js) = zmmnts0( 2,js) + xp(ip)
            zmmnts0( 3,js) = zmmnts0( 3,js) + yp(ip)
            zmmnts0( 4,js) = zmmnts0( 4,js) + zp(ip)
            zmmnts0( 7,js) = zmmnts0( 7,js) + uxp(ip)*gaminv(ip)
            zmmnts0( 8,js) = zmmnts0( 8,js) + uyp(ip)*gaminv(ip)
            zmmnts0( 9,js) = zmmnts0( 9,js) + uzp(ip)*gaminv(ip)
            zmmnts0(10,js) = zmmnts0(10,js) + xp(ip)*yp(ip)
            zmmnts0(12,js) = zmmnts0(12,js) + xp(ip)**2
            zmmnts0(13,js) = zmmnts0(13,js) + yp(ip)**2
            zmmnts0(14,js) = zmmnts0(14,js) + zp(ip)**2
            zmmnts0(17,js) = zmmnts0(17,js) + (gaminv(ip)*uxp(ip))**2
            zmmnts0(18,js) = zmmnts0(18,js) + (gaminv(ip)*uyp(ip))**2
            zmmnts0(19,js) = zmmnts0(19,js) + (gaminv(ip)*uzp(ip))**2
            zmmnts0(22,js) = zmmnts0(22,js) + zp(ip)*gaminv(ip)*uzp(ip)
            zmmnts0(25,js) = zmmnts0(25,js) + xp(ip)*uzp(ip)*gaminv(ip)
            zmmnts0(26,js) = zmmnts0(26,js) + yp(ip)*uzp(ip)*gaminv(ip)
            zmmnts0(27,js) = zmmnts0(27,js) + uxp(ip)*uzp(ip)*gaminv(ip)**2
            zmmnts0(28,js) = zmmnts0(28,js) + uyp(ip)*uzp(ip)*gaminv(ip)**2
            zmmnts0( 5,js) = zmmnts0( 5,js) + uxp(ip)*vzi
            zmmnts0( 6,js) = zmmnts0( 6,js) + uyp(ip)*vzi
            zmmnts0(11,js) = zmmnts0(11,js) + uxp(ip)*uyp(ip)*vzi**2
            zmmnts0(15,js) = zmmnts0(15,js) + (uxp(ip)*vzi)**2
            zmmnts0(16,js) = zmmnts0(16,js) + (uyp(ip)*vzi)**2
            zmmnts0(20,js) = zmmnts0(20,js) + xp(ip)*uxp(ip)*vzi
            zmmnts0(21,js) = zmmnts0(21,js) + yp(ip)*uyp(ip)*vzi
            zmmnts0(23,js) = zmmnts0(23,js) + xp(ip)*uyp(ip)*vzi
            zmmnts0(24,js) = zmmnts0(24,js) + yp(ip)*uxp(ip)*vzi
            zmmnts0(29,js) = zmmnts0(29,js) + xp(ip)*uxp(ip)*gaminv(ip)
            zmmnts0(30,js) = zmmnts0(30,js) + yp(ip)*uyp(ip)*gaminv(ip)
            zmmnts0(31,js) = zmmnts0(31,js) + xp(ip)*uyp(ip)*gaminv(ip)
            zmmnts0(32,js) = zmmnts0(32,js) + yp(ip)*uxp(ip)*gaminv(ip)
            zmmnts0(33,js) = zmmnts0(33,js) + uxp(ip)*uyp(ip)*gaminv(ip)**2
          enddo
        endif

 -------------------------------------------------------------------------
#endif

        --- Adds in dtscale factor efficiently. Somewhat kludgy though.
        if (dtscale /= 1.)
     &    zmmnts0(:,js) = zmmntscopy + (zmmnts0(:,js) - zmmntscopy)*dtscale


        --- Now, accumulate moments onto Z grid.
        if (ifzmmnt == 2) then

          --- Extrapolate quantities onto grid points so all particles
          ---   contributing to a moment are at same z.
          --- dti and dtip1 calculated by assuming constant Vz and 
          ---   computing times at which z crosses the two nearest grid points.
          --- V extrapolated using current and previous values of V
          ---   (this avoids having to use E and B to extrapolate V).
          --- X extrapolated assuming constant V (uses V at particle position).
          --- After extrapolation, data is linearly weighted onto grid by
          ---   particle's z location.  This has the effect that the less
          ---   accurately extrapolated particles, which are far from the
          ---   grid point, are weighted less, hopefully helping smoothness.

          --- extrapolate to two nearest cells
          do ip=1,np
            if (pp(0,ip) /= 0.) then
              vzi = 1./(uzp(ip)*gaminv(ip)+SMALLPOS)
              iz(ip)  = (zp(ip) - zbeam - zmmntmin)*dzmi
              if (iz(ip) == nzmmnt) iz(ip) = nzmmnt - 1
              dti = (iz(ip)*dzm-zp(ip)+zbeam+zmmntmin)*vzi
              dti = max(-zmmntdtextmax*dt,min(zmmntdtextmax*dt,dti))
              pp(3,ip) = uxp(ip)*(1. + dti*oneondt) - uxpo(ip)*dti*oneondt
              pp(7,ip) = uyp(ip)*(1. + dti*oneondt) - uypo(ip)*dti*oneondt
              pp(11,ip) = uzp(ip)*(1. + dti*oneondt) - uzpo(ip)*dti*oneondt
              pp(1,ip) = xp(ip) + uxp(ip)*dti*gaminv(ip)
              pp(5,ip) = yp(ip) + uyp(ip)*dti*gaminv(ip)
              pp(9,ip) = zp(ip) + uzp(ip)*dti*gaminv(ip)
              pp(9,ip) = iz(ip)*dzm + zbeam + zmmntmin

              if (l_momentum_moments .or. .not. lrelativ) then
                pp(18,ip) = 1.
              else
                pp(18,ip) = 1./
     &          sqrt(1. + (pp(3,ip)**2 + pp(7,ip)**2 + pp(11,ip)**2)/clight**2)
              endif

              dtip1=((iz(ip)+1)*dzm-zp(ip)+zbeam+zmmntmin)*vzi
              dtip1 = max(-zmmntdtextmax*dt,min(zmmntdtextmax*dt,dtip1))
              pp(4,ip) = uxp(ip)*(1. + dtip1*oneondt) - uxpo(ip)*dtip1*oneondt
              pp(8,ip) = uyp(ip)*(1. + dtip1*oneondt) - uypo(ip)*dtip1*oneondt
              pp(12,ip) = uzp(ip)*(1. + dtip1*oneondt) - uzpo(ip)*dtip1*oneondt
              pp(2,ip) = xp(ip) + uxp(ip)*dtip1*gaminv(ip)
              pp(6,ip) = yp(ip) + uyp(ip)*dtip1*gaminv(ip)
              pp(10,ip) = zp(ip) + uzp(ip)*dtip1*gaminv(ip)
              pp(10,ip) = (iz(ip)+1)*dzm + zbeam + zmmntmin

              if (l_momentum_moments .or. .not. lrelativ) then
                pp(13,ip) = 1.
              else
                pp(13,ip) = 1./
     &          sqrt(1. + (pp(4,ip)**2 + pp(8,ip)**2 + pp(12,ip)**2)/clight**2)
              endif

              pp(14,ip) = (1. - (zp(ip)-zbeam-zmmntmin)*dzmi + iz(ip))*dtscale
              pp(15,ip) = ((zp(ip) - zbeam - zmmntmin)*dzmi - iz(ip))*dtscale
              pp(16,ip) = dti
              pp(17,ip) = dtip1
            else
              iz(ip) = 0
              pp(:,ip) = 0.
            endif
          enddo

 -------------------------------------------------------------------------
#ifdef J90
        --- This method is faster for vectorized machines, primarily the J90.
          --- put particle moments into pmmnts array for zmmnts(iz,)
          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              pmmnts( 1,ip) = wz0
              pmmnts( 2,ip) = pp(1,ip)*wz0
              pmmnts( 3,ip) = pp(5,ip)*wz0
              pmmnts( 4,ip) = pp(9,ip)*wz0
              pmmnts( 7,ip) = pp(3,ip)*wz0
              pmmnts( 8,ip) = pp(7,ip)*wz0
              pmmnts( 9,ip) = pp(11,ip)*wz0
              pmmnts(10,ip) = pp(1,ip)*pp(5,ip)*wz0
              pmmnts(12,ip) = pp(1,ip)**2*wz0
              pmmnts(13,ip) = pp(5,ip)**2*wz0
              pmmnts(14,ip) = pp(9,ip)**2*wz0
              pmmnts(17,ip) = (pp(3,ip))**2*wz0
              pmmnts(18,ip) = (pp(7,ip))**2*wz0
              pmmnts(19,ip) = (pp(11,ip))**2*wz0
              pmmnts(22,ip) = pp(9,ip)*pp(11,ip)*wz0
              pmmnts(25,ip) = pp(1,ip)*pp(11,ip)*wz0
              pmmnts(26,ip) = pp(5,ip)*pp(11,ip)*wz0
              pmmnts(27,ip) = pp(3,ip)*pp(11,ip)*wz0
              pmmnts(28,ip) = pp(7,ip)*pp(11,ip)*wz0
              pmmnts( 5,ip) = pp(3,ip)*vzi*wz0
              pmmnts( 6,ip) = pp(7,ip)*vzi*wz0
              pmmnts(11,ip) = pp(3,ip)*pp(7,ip)*vzi**2*wz0
              pmmnts(15,ip) = (pp(3,ip)*vzi)**2*wz0
              pmmnts(16,ip) = (pp(7,ip)*vzi)**2*wz0
              pmmnts(20,ip) = pp(1,ip)*pp(3,ip)*vzi*wz0
              pmmnts(21,ip) = pp(5,ip)*pp(7,ip)*vzi*wz0
              pmmnts(23,ip) = pp(1,ip)*pp(7,ip)*vzi*wz0
              pmmnts(24,ip) = pp(5,ip)*pp(3,ip)*vzi*wz0
              pmmnts(29,ip) = pp(1,ip)*pp(3,ip)*wz0
              pmmnts(30,ip) = pp(5,ip)*pp(7,ip)*wz0
              pmmnts(31,ip) = pp(1,ip)*pp(7,ip)*wz0
              pmmnts(32,ip) = pp(5,ip)*pp(3,ip)*wz0
              pmmnts(33,ip) = pp(3,ip)*pp(7,ip)*wz0
            enddo
          else
            do ip=1,np
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              pmmnts( 1,ip) = wz0
              pmmnts( 2,ip) = pp(1,ip)*wz0
              pmmnts( 3,ip) = pp(5,ip)*wz0
              pmmnts( 4,ip) = pp(9,ip)*wz0
              pmmnts( 7,ip) = pp(3,ip)*pp(18,ip)*wz0
              pmmnts( 8,ip) = pp(7,ip)*pp(18,ip)*wz0
              pmmnts( 9,ip) = pp(11,ip)*pp(18,ip)*wz0
              pmmnts(10,ip) = pp(1,ip)*pp(5,ip)*wz0
              pmmnts(12,ip) = pp(1,ip)**2*wz0
              pmmnts(13,ip) = pp(5,ip)**2*wz0
              pmmnts(14,ip) = pp(9,ip)**2*wz0
              pmmnts(17,ip) = (pp(18,ip)*pp(3,ip))**2*wz0
              pmmnts(18,ip) = (pp(18,ip)*pp(7,ip))**2*wz0
              pmmnts(19,ip) = (pp(18,ip)*pp(11,ip))**2*wz0
              pmmnts(22,ip) = pp(9,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(25,ip) = pp(1,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(26,ip) = pp(5,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(27,ip) = pp(3,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              pmmnts(28,ip) = pp(7,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              pmmnts( 5,ip) = pp(3,ip)*vzi*wz0
              pmmnts( 6,ip) = pp(7,ip)*vzi*wz0
              pmmnts(11,ip) = pp(3,ip)*pp(7,ip)*vzi**2*wz0
              pmmnts(15,ip) = (pp(3,ip)*vzi)**2*wz0
              pmmnts(16,ip) = (pp(7,ip)*vzi)**2*wz0
              pmmnts(20,ip) = pp(1,ip)*pp(3,ip)*vzi*wz0
              pmmnts(21,ip) = pp(5,ip)*pp(7,ip)*vzi*wz0
              pmmnts(23,ip) = pp(1,ip)*pp(7,ip)*vzi*wz0
              pmmnts(24,ip) = pp(5,ip)*pp(3,ip)*vzi*wz0
              pmmnts(29,ip) = pp(1,ip)*pp(3,ip)*pp(18,ip)*wz0
              pmmnts(30,ip) = pp(5,ip)*pp(7,ip)*pp(18,ip)*wz0
              pmmnts(31,ip) = pp(1,ip)*pp(7,ip)*pp(18,ip)*wz0
              pmmnts(32,ip) = pp(5,ip)*pp(3,ip)*pp(18,ip)*wz0
              pmmnts(33,ip) = pp(3,ip)*pp(7,ip)*pp(18,ip)**2*wz0
            enddo
          endif

          --- deposit moments into array
          do ip=1,np
            do im=1,NUMZMMNT
              zmmnts(iz(ip),im,js) = zmmnts(iz(ip),im,js) + pmmnts(im,ip)
            enddo
          enddo

          --- put particle moments into pmmnts array for zmmnts(iz+1,)
          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              pmmnts( 1,ip) = wz1
              pmmnts( 2,ip) = pp(2,ip)*wz1
              pmmnts( 3,ip) = pp(6,ip)*wz1
              pmmnts( 4,ip) = pp(10,ip)*wz1
              pmmnts( 7,ip) = pp(4,ip)*wz1
              pmmnts( 8,ip) = pp(8,ip)*wz1
              pmmnts( 9,ip) = pp(12,ip)*wz1
              pmmnts(10,ip) = pp(2,ip)*pp(6,ip)*wz1
              pmmnts(12,ip) = pp(2,ip)**2*wz1
              pmmnts(13,ip) = pp(6,ip)**2*wz1
              pmmnts(14,ip) = pp(10,ip)**2*wz1
              pmmnts(17,ip) = (pp(13,ip)*pp(4,ip))**2*wz1
              pmmnts(18,ip) = (pp(13,ip)*pp(8,ip))**2*wz1
              pmmnts(19,ip) = (pp(13,ip)*pp(12,ip))**2*wz1
              pmmnts(22,ip) = pp(10,ip)*pp(12,ip)*wz1
              pmmnts(25,ip) = pp(2,ip)*pp(12,ip)*wz1
              pmmnts(26,ip) = pp(6,ip)*pp(12,ip)*wz1
              pmmnts(27,ip) = pp(4,ip)*pp(12,ip)*wz1
              pmmnts(28,ip) = pp(8,ip)*pp(12,ip)*wz1
              pmmnts( 5,ip) = pp(4,ip)*vzi*wz1
              pmmnts( 6,ip) = pp(8,ip)*vzi*wz1
              pmmnts(11,ip) = pp(4,ip)*pp(8,ip)*vzi**2*wz1
              pmmnts(15,ip) = (pp(4,ip)*vzi)**2*wz1
              pmmnts(16,ip) = (pp(8,ip)*vzi)**2*wz1
              pmmnts(20,ip) = pp(2,ip)*pp(4,ip)*vzi*wz1
              pmmnts(21,ip) = pp(6,ip)*pp(8,ip)*vzi*wz1
              pmmnts(23,ip) = pp(2,ip)*pp(8,ip)*vzi*wz1
              pmmnts(24,ip) = pp(6,ip)*pp(4,ip)*vzi*wz1
              pmmnts(29,ip) = pp(2,ip)*pp(4,ip)*wz1
              pmmnts(30,ip) = pp(6,ip)*pp(8,ip)*wz1
              pmmnts(31,ip) = pp(2,ip)*pp(8,ip)*wz1
              pmmnts(32,ip) = pp(6,ip)*pp(4,ip)*wz1
              pmmnts(33,ip) = pp(4,ip)*pp(8,ip)*wz1
            enddo
          else
            do ip=1,np
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              pmmnts( 1,ip) = wz1
              pmmnts( 2,ip) = pp(2,ip)*wz1
              pmmnts( 3,ip) = pp(6,ip)*wz1
              pmmnts( 4,ip) = pp(10,ip)*wz1
              pmmnts( 7,ip) = pp(4,ip)*pp(13,ip)*wz1
              pmmnts( 8,ip) = pp(8,ip)*pp(13,ip)*wz1
              pmmnts( 9,ip) = pp(12,ip)*pp(13,ip)*wz1
              pmmnts(10,ip) = pp(2,ip)*pp(6,ip)*wz1
              pmmnts(12,ip) = pp(2,ip)**2*wz1
              pmmnts(13,ip) = pp(6,ip)**2*wz1
              pmmnts(14,ip) = pp(10,ip)**2*wz1
              pmmnts(17,ip) = (pp(13,ip)*pp(4,ip))**2*wz1
              pmmnts(18,ip) = (pp(13,ip)*pp(8,ip))**2*wz1
              pmmnts(19,ip) = (pp(13,ip)*pp(12,ip))**2*wz1
              pmmnts(22,ip) = pp(10,ip)*pp(13,ip)*pp(12,ip)*wz1
              pmmnts(25,ip) = pp(2,ip)*pp(12,ip)*pp(13,ip)*wz1
              pmmnts(26,ip) = pp(6,ip)*pp(12,ip)*pp(13,ip)*wz1
              pmmnts(27,ip) = pp(4,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              pmmnts(28,ip) = pp(8,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              pmmnts( 5,ip) = pp(4,ip)*vzi*wz1
              pmmnts( 6,ip) = pp(8,ip)*vzi*wz1
              pmmnts(11,ip) = pp(4,ip)*pp(8,ip)*vzi**2*wz1
              pmmnts(15,ip) = (pp(4,ip)*vzi)**2*wz1
              pmmnts(16,ip) = (pp(8,ip)*vzi)**2*wz1
              pmmnts(20,ip) = pp(2,ip)*pp(4,ip)*vzi*wz1
              pmmnts(21,ip) = pp(6,ip)*pp(8,ip)*vzi*wz1
              pmmnts(23,ip) = pp(2,ip)*pp(8,ip)*vzi*wz1
              pmmnts(24,ip) = pp(6,ip)*pp(4,ip)*vzi*wz1
              pmmnts(29,ip) = pp(2,ip)*pp(4,ip)*pp(13,ip)*wz1
              pmmnts(30,ip) = pp(6,ip)*pp(8,ip)*pp(13,ip)*wz1
              pmmnts(31,ip) = pp(2,ip)*pp(8,ip)*pp(13,ip)*wz1
              pmmnts(32,ip) = pp(6,ip)*pp(4,ip)*pp(13,ip)*wz1
              pmmnts(33,ip) = pp(4,ip)*pp(8,ip)*pp(13,ip)**2*wz1
            enddo
          endif

          --- deposit moments into array
          do ip=1,np
            do im=1,NUMZMMNT
              zmmnts(iz(ip)+1,im,js) = zmmnts(iz(ip)+1,im,js) + pmmnts(im,ip)
            enddo
          enddo

#else

 -------------------------------------------------------------------------
        --- This method is faster for RISC and pentium machines.
        --- Direct accumulation into the zmmnts array.

          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              --- sum particle moments into zmmnts(iz,)
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              i = iz(ip)
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz0
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(1,ip)*wz0
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(5,ip)*wz0
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(9,ip)*wz0
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(3,ip)*wz0
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(7,ip)*wz0
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(11,ip)*wz0
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(1,ip)*pp(5,ip)*wz0
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(1,ip)**2*wz0
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(5,ip)**2*wz0
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(9,ip)**2*wz0
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(3,ip))**2*wz0
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(7,ip))**2*wz0
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(11,ip))**2*wz0
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(9,ip)*pp(11,ip)*wz0
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(1,ip)*pp(11,ip)*wz0
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(5,ip)*pp(11,ip)*wz0
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(3,ip)*pp(11,ip)*wz0
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(7,ip)*pp(11,ip)*wz0
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(3,ip)*vzi*wz0
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(7,ip)*vzi*wz0
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(3,ip)*pp(7,ip)*vzi**2*wz0
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(3,ip)*vzi)**2*wz0
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(7,ip)*vzi)**2*wz0
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(1,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(5,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(1,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(5,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(1,ip)*pp(3,ip)*wz0
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(5,ip)*pp(7,ip)*wz0
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(1,ip)*pp(7,ip)*wz0
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(5,ip)*pp(3,ip)*wz0
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(3,ip)*pp(7,ip)*wz0

              --- sum particle moments into zmmnts(iz+1,,js)
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              i = iz(ip) + 1
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz1
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(2,ip)*wz1
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(6,ip)*wz1
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(10,ip)*wz1
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(4,ip)*wz1
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(8,ip)*wz1
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(12,ip)*wz1
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(2,ip)*pp(6,ip)*wz1
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(2,ip)**2*wz1
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(6,ip)**2*wz1
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(10,ip)**2*wz1
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(4,ip))**2*wz1
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(8,ip))**2*wz1
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(12,ip))**2*wz1
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(10,ip)*pp(12,ip)*wz1
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(2,ip)*pp(12,ip)*wz1
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(6,ip)*pp(12,ip)*wz1
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(4,ip)*pp(12,ip)*wz1
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(8,ip)*pp(12,ip)*wz1
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(4,ip)*vzi*wz1
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(8,ip)*vzi*wz1
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(4,ip)*pp(8,ip)*vzi**2*wz1
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(4,ip)*vzi)**2*wz1
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(8,ip)*vzi)**2*wz1
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(2,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(6,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(2,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(6,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(2,ip)*pp(4,ip)*wz1
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(6,ip)*pp(8,ip)*wz1
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(2,ip)*pp(8,ip)*wz1
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(6,ip)*pp(4,ip)*wz1
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(4,ip)*pp(8,ip)*wz1

            enddo
          else
            do ip=1,np
              --- sum particle moments into zmmnts(iz,)
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              i = iz(ip)
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz0
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(1,ip)*wz0
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(5,ip)*wz0
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(9,ip)*wz0
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(1,ip)*pp(5,ip)*wz0
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(1,ip)**2*wz0
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(5,ip)**2*wz0
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(9,ip)**2*wz0
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(18,ip)*pp(3,ip))**2*wz0
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(18,ip)*pp(7,ip))**2*wz0
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(18,ip)*pp(11,ip))**2*wz0
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(9,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(1,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(5,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(3,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(7,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(3,ip)*vzi*wz0
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(7,ip)*vzi*wz0
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(3,ip)*pp(7,ip)*vzi**2*wz0
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(3,ip)*vzi)**2*wz0
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(7,ip)*vzi)**2*wz0
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(1,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(5,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(1,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(5,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(1,ip)*pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(5,ip)*pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(1,ip)*pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(5,ip)*pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(3,ip)*pp(7,ip)*pp(18,ip)**2*wz0

              --- sum particle moments into zmmnts(iz+1,,js)
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              i = iz(ip) + 1
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz1
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(2,ip)*wz1
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(6,ip)*wz1
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(10,ip)*wz1
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(2,ip)*pp(6,ip)*wz1
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(2,ip)**2*wz1
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(6,ip)**2*wz1
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(10,ip)**2*wz1
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(13,ip)*pp(4,ip))**2*wz1
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(13,ip)*pp(8,ip))**2*wz1
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(13,ip)*pp(12,ip))**2*wz1
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(10,ip)*pp(13,ip)*pp(12,ip)*wz1
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(2,ip)*pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(6,ip)*pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(4,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(8,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(4,ip)*vzi*wz1
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(8,ip)*vzi*wz1
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(4,ip)*pp(8,ip)*vzi**2*wz1
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(4,ip)*vzi)**2*wz1
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(8,ip)*vzi)**2*wz1
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(2,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(6,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(2,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(6,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(2,ip)*pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(6,ip)*pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(2,ip)*pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(6,ip)*pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(4,ip)*pp(8,ip)*pp(13,ip)**2*wz1

            enddo
          endif

 -------------------------------------------------------------------------
#endif

          --- End of Z moments accumulation
        endif

        deallocate(pp,iz)

      endif

      if (itask == 3 .and. .not. laccumulate_zmoments) then

        --- If the moments for each species was calculated separately, sum
        --- them to get the combined moments.
        if (nszmmnt > 0) then
          maxp(:,nszmmnt) = maxval(maxp(:,0:nszmmnt-1),2)
          minp(:,nszmmnt) = minval(minp(:,0:nszmmnt-1),2)
          zmmnts0(:,nszmmnt) = sum(zmmnts0(:,0:nszmmnt-1),2)
          zmmnts(:,:,nszmmnt) = sum(zmmnts(:,:,0:nszmmnt-1),3)
        endif


        clighti = 1./clight

        --- Make sure that the moments are set to have the same
        --- number of species as the z moments.
        if (nsmmnt /= nszmmnt) then
          nsmmnt = nszmmnt
          call gchange("Moments",0)
        endif

        --- Make sure that the window moments are set to have the same
        --- number of species as the z moments.
        if (nswind /= nszmmnt) then
          nswind = nszmmnt
          call gchange("Win_Moments",0)
        endif

        xmaxp  = maxp(1,:)
        xminp  = minp(1,:)
        ymaxp  = maxp(2,:)
        yminp  = minp(2,:)
        zmaxp  = maxp(3,:)
        zminp  = minp(3,:)
        vxmaxp = maxp(4,:)
        vxminp = minp(4,:)
        vymaxp = maxp(5,:)
        vyminp = minp(5,:)
        vzmaxp = maxp(6,:)
        vzminp = minp(6,:)

        --- For slave, call routine which sums moments over processors.
#ifdef MPIPARALLEL
        call parallel_sum_mmnts(zmmnts0,zmmnts)
#endif

        --- Complete the calculation of moments: divide by particle number

        --- Global moments first
        do js=0,nsmmnt
          pnumi = 1./(zmmnts0( 1,js)+SMALLPOS)
          pnum(0,js)    = zmmnts0( 1,js)
          xbar(0,js)    = zmmnts0( 2,js)*pnumi
          ybar(0,js)    = zmmnts0( 3,js)*pnumi
          zbar(0,js)    = zmmnts0( 4,js)*pnumi
          xpbar(0,js)   = zmmnts0( 5,js)*pnumi
          ypbar(0,js)   = zmmnts0( 6,js)*pnumi
          vxbar(0,js)   = zmmnts0( 7,js)*pnumi
          vybar(0,js)   = zmmnts0( 8,js)*pnumi
          vzbar(0,js)   = zmmnts0( 9,js)*pnumi
          xybar(0,js)   = zmmnts0(10,js)*pnumi
          xpypbar(0,js) = zmmnts0(11,js)*pnumi
          xsqbar(0,js)  = zmmnts0(12,js)*pnumi
          ysqbar(0,js)  = zmmnts0(13,js)*pnumi
          zsqbar(0,js)  = zmmnts0(14,js)*pnumi
          xpsqbar(0,js) = zmmnts0(15,js)*pnumi
          ypsqbar(0,js) = zmmnts0(16,js)*pnumi
          vxsqbar(0,js) = zmmnts0(17,js)*pnumi
          vysqbar(0,js) = zmmnts0(18,js)*pnumi
          vzsqbar(0,js) = zmmnts0(19,js)*pnumi
          xxpbar(0,js)  = zmmnts0(20,js)*pnumi
          yypbar(0,js)  = zmmnts0(21,js)*pnumi
          zvzbar(0,js)  = zmmnts0(22,js)*pnumi
          xypbar(0,js)  = zmmnts0(23,js)*pnumi 
          yxpbar(0,js)  = zmmnts0(24,js)*pnumi 
          xvzbar(0,js)  = zmmnts0(25,js)*pnumi
          yvzbar(0,js)  = zmmnts0(26,js)*pnumi
          vxvzbar(0,js) = zmmnts0(27,js)*pnumi
          vyvzbar(0,js) = zmmnts0(28,js)*pnumi
          xvxbar(0,js)  = zmmnts0(29,js)*pnumi
          yvybar(0,js)  = zmmnts0(30,js)*pnumi
          xvybar(0,js)  = zmmnts0(31,js)*pnumi
          yvxbar(0,js)  = zmmnts0(32,js)*pnumi
          vxvybar(0,js) = zmmnts0(33,js)*pnumi

          --- Compute second order moments with averages subtracted
          delxsq = xsqbar(0,js) - xbar(0,js)*xbar(0,js)
          delxxp = xxpbar(0,js) - xbar(0,js)*xpbar(0,js)
          delxpsq = xpsqbar(0,js) - xpbar(0,js)*xpbar(0,js)
          delxvx = xvxbar(0,js) - xbar(0,js)*vxbar(0,js)
          delvxsq = vxsqbar(0,js) - vxbar(0,js)**2

          delysq = ysqbar(0,js) - ybar(0,js)*ybar(0,js)
          delyyp = yypbar(0,js) - ybar(0,js)*ypbar(0,js)
          delypsq = ypsqbar(0,js) - ypbar(0,js)*ypbar(0,js)
          delyvy = yvybar(0,js) - ybar(0,js)*vybar(0,js)
          delvysq = vysqbar(0,js) - vybar(0,js)**2

          delxy = xybar(0,js) - xbar(0,js)*ybar(0,js)
          delxyp = xypbar(0,js) - xbar(0,js)*ypbar(0,js)
          delyxp = yxpbar(0,js) - ybar(0,js)*xpbar(0,js)
          delxpyp = xpypbar(0,js) - xpbar(0,js)*ypbar(0,js)
          delxvy = xvybar(0,js) - xbar(0,js)*vybar(0,js)
          delyvx = yvxbar(0,js) - ybar(0,js)*vxbar(0,js)
          delvxvy = vxvybar(0,js) - vxbar(0,js)*vybar(0,js)

          delzsq = zsqbar(0,js) - zbar(0,js)**2
          delvzsq = vzsqbar(0,js) - vzbar(0,js)**2
          delzvz = zvzbar(0,js) - zbar(0,js)*vzbar(0,js)

          --- Compute overall true RMS positions
          xrms(0,js) = sqrt(max(0.,delxsq))
          yrms(0,js) = sqrt(max(0.,delysq))
          zrms(0,js) = sqrt(max(0.,delzsq))
          rrms(0,js) = sqrt(xrms(0,js)**2 + yrms(0,js)**2)

          --- Compute overall true RMS velocities
          xprms(0,js) = sqrt(max(0.,delxpsq))
          yprms(0,js) = sqrt(max(0.,delypsq))
          vxrms(0,js) = sqrt(max(0.,delvxsq))
          vyrms(0,js) = sqrt(max(0.,delvysq))
          vzrms(0,js) = sqrt(max(0.,delvzsq))

          --- Compute overall emittances
          epsx(0,js) = 4.*sqrt(max(0.,delxsq*delxpsq - delxxp**2))
          epsy(0,js) = 4.*sqrt(max(0.,delysq*delypsq - delyyp**2))
          epsnx(0,js) = 4.*sqrt(max(0.,delxsq*delvxsq - delxvx**2))*clighti*1.e6
          epsny(0,js) = 4.*sqrt(max(0.,delysq*delvysq - delyvy**2))*clighti*1.e6

          epsnz(0,js) = 4.*sqrt(max(0.,delzsq*delvzsq - delzvz**2))*clighti
          epsz(0,js) = epsnz(0,js)/dvnz(vzbar(0,js)*clighti)

          --- Compute generalized emittances
          tr = 4.*((delxsq + delysq)*(delxpsq + delypsq)
     &         -(delxxp + delyyp)**2 - (delxyp - delyxp)**2)
          epsr(0,js) = sqrt(max(0.,tr))
          tg = 0.5*(epsx(0,js)**2 + epsy(0,js)**2) +
     &             16*(delxy*delxpyp - delxyp*delyxp)
          epsg(0,js) = sqrt(max(0.,tg))
          th = epsx(0,js)**2*epsy(0,js)**2 +
     &       256*((delxy*delxpyp)**2 + (delxyp*delyxp)**2 -
     &       delxsq*delysq*(delxpyp)**2 - delxsq*delypsq*(delyxp)**2 -
     &       delxpsq*delysq*(delxyp)**2 - delxpsq*delypsq*(delxy)**2 -
     &       2*delxy*delxyp*delyxp*delxpyp + 2*delxxp*delypsq*delxy*delyxp -
     &       2*delxxp*delyyp*delxy*delxpyp - 2*delxxp*delyyp*delxyp*delyxp +
     &       2*delxpsq*delyyp*delxy*delxyp + 2*delxsq*delyyp*delyxp*delxpyp +
     &       2*delxxp*delysq*delxpyp*delxyp)
          epsh(0,js) = sqrt(sqrt(max(0.,th)))

          --- Compute normalized generalized emittances
          tr = 4.*((delxsq + delysq)*(delvxsq + delvysq)
     &         -(delxvx + delyvy)**2 - (delxvy - delyvx)**2)
          epsnr(0,js) = sqrt(max(0.,tr))*clighti*1.e6
          tg = 0.5*(epsnx(0,js)**2 + epsny(0,js)**2) +
     &             16*(delxy*delvxvy - delxvy*delyvx)
          epsng(0,js) = sqrt(max(0.,tg))*clighti*1.e6
          th = epsnx(0,js)**2*epsny(0,js)**2 +
     &       256*((delxy*delvxvy)**2 + (delxvy*delyvx)**2 -
     &       delxsq*delysq*(delvxvy)**2 - delxsq*delvysq*(delyvx)**2 -
     &       delvxsq*delysq*(delxvy)**2 - delvxsq*delvysq*(delxy)**2 -
     &       2*delxy*delxvy*delyvx*delvxvy + 2*delxvx*delvysq*delxy*delyvx -
     &       2*delxvx*delyvy*delxy*delvxvy - 2*delxvx*delyvy*delxvy*delyvx +
     &       2*delvxsq*delyvy*delxy*delxvy + 2*delxsq*delyvy*delyvx*delvxvy +
     &       2*delxvx*delysq*delvxvy*delxvy)
          epsnh(0,js) = sqrt(sqrt(max(0.,th)))*clighti*1.e6

          --- RMS beam length
          bmlen(js) = sqrt(max(0., zsqbar(0,js)-zbar(0,js)**2))

          ---  Axial z momentum in vbeam frame (classical)
          pz(js) = zmmntsm(js)*zmmntsw(js)*pnum(0,js) * (vzbar(0,js)-vbeam)

          ---  Total Z kinetic energy in lab frame minus beam energy
          ---  (classical) using <vz^2>-vbeam^2;
          ekzmbe(js) = 0.5*zmmntsm(js)*zmmntsw(js)*pnum(0,js)*(vzsqbar(0,js) -
     &                 vbeam**2)

          ---  Z kinetic energy in beam frame (classical), using <(vz-vbeam)^2>
          ekzbeam(js) = 0.5*zmmntsm(js)*zmmntsw(js)*pnum(0,js)*
     &                  (vzsqbar(0,js) - 2.*vzbar(0,js)*vbeam + vbeam**2)

          ---  Perp kinetic energy 
          ekperp(js) =  0.5*zmmntsm(js)*zmmntsw(js)*pnum(0,js)*(vxsqbar(0,js)+
     &                  vysqbar(0,js))

          --- Total kinetic energy in beam frame
          ek(js) = ekzbeam(js) + ekperp(js)

        enddo

        ---  Number of live particles (integer)
        if(wpid==0) nplive = pnum(0,nswind) + 0.5


        --- Now scale the full Z moments by particle number
        if (ifzmmnt == 2) then
          do js=0,nszmmnt
            do izm = 0, nzmmnt
              pnumi = 1./(zmmnts(izm, 1,js)+SMALLPOS)
              pnumz(izm,js)    = zmmnts(izm, 1,js)
              xbarz(izm,js)    = zmmnts(izm, 2,js)*pnumi
              ybarz(izm,js)    = zmmnts(izm, 3,js)*pnumi
              zbarz(izm,js)    = zmmnts(izm, 4,js)*pnumi
              xpbarz(izm,js)   = zmmnts(izm, 5,js)*pnumi
              ypbarz(izm,js)   = zmmnts(izm, 6,js)*pnumi
              vxbarz(izm,js)   = zmmnts(izm, 7,js)*pnumi
              vybarz(izm,js)   = zmmnts(izm, 8,js)*pnumi
              vzbarz(izm,js)   = zmmnts(izm, 9,js)*pnumi
              xybarz(izm,js)   = zmmnts(izm,10,js)*pnumi
              xpypbarz(izm,js) = zmmnts(izm,11,js)*pnumi
              xsqbarz(izm,js)  = zmmnts(izm,12,js)*pnumi
              ysqbarz(izm,js)  = zmmnts(izm,13,js)*pnumi
              zsqbarz(izm,js)  = zmmnts(izm,14,js)*pnumi
              xpsqbarz(izm,js) = zmmnts(izm,15,js)*pnumi
              ypsqbarz(izm,js) = zmmnts(izm,16,js)*pnumi
              vxsqbarz(izm,js) = zmmnts(izm,17,js)*pnumi
              vysqbarz(izm,js) = zmmnts(izm,18,js)*pnumi
              vzsqbarz(izm,js) = zmmnts(izm,19,js)*pnumi
              xxpbarz(izm,js)  = zmmnts(izm,20,js)*pnumi
              yypbarz(izm,js)  = zmmnts(izm,21,js)*pnumi
              zvzbarz(izm,js)  = zmmnts(izm,22,js)*pnumi
              xypbarz(izm,js)  = zmmnts(izm,23,js)*pnumi 
              yxpbarz(izm,js)  = zmmnts(izm,24,js)*pnumi 
              xvzbarz(izm,js)  = zmmnts(izm,25,js)*pnumi
              yvzbarz(izm,js)  = zmmnts(izm,26,js)*pnumi
              vxvzbarz(izm,js) = zmmnts(izm,27,js)*pnumi
              vyvzbarz(izm,js) = zmmnts(izm,28,js)*pnumi
              xvxbarz(izm,js)  = zmmnts(izm,29,js)*pnumi
              yvybarz(izm,js)  = zmmnts(izm,30,js)*pnumi
              xvybarz(izm,js)  = zmmnts(izm,31,js)*pnumi
              yvxbarz(izm,js)  = zmmnts(izm,32,js)*pnumi
              vxvybarz(izm,js) = zmmnts(izm,33,js)*pnumi

              --- Compute second order moments with averages subtracted
              delxsq = xsqbarz(izm,js) - xbarz(izm,js)*xbarz(izm,js)
              delxxp = xxpbarz(izm,js) - xbarz(izm,js)*xpbarz(izm,js)
              delxpsq = xpsqbarz(izm,js) - xpbarz(izm,js)*xpbarz(izm,js)
              delxvx = xvxbarz(izm,js) - xbarz(izm,js)*vxbarz(izm,js)
              delvxsq = vxsqbarz(izm,js) - vxbarz(izm,js)**2

              delysq = ysqbarz(izm,js) - ybarz(izm,js)*ybarz(izm,js)
              delyyp = yypbarz(izm,js) - ybarz(izm,js)*ypbarz(izm,js)
              delypsq = ypsqbarz(izm,js) - ypbarz(izm,js)*ypbarz(izm,js)
              delyvy = yvybarz(izm,js) - ybarz(izm,js)*vybarz(izm,js)
              delvysq = vysqbarz(izm,js) - vybarz(izm,js)**2

              delxy = xybarz(izm,js) - xbarz(izm,js)*ybarz(izm,js)
              delxyp = xypbarz(izm,js) - xbarz(izm,js)*ypbarz(izm,js)
              delyxp = yxpbarz(izm,js) - ybarz(izm,js)*xpbarz(izm,js)
              delxpyp = xpypbarz(izm,js) - xpbarz(izm,js)*ypbarz(izm,js)
              delxvy = xvybarz(izm,js) - xbarz(izm,js)*vybarz(izm,js)
              delyvx = yvxbarz(izm,js) - ybarz(izm,js)*vxbarz(izm,js)
              delvxvy = vxvybarz(izm,js) - vxbarz(izm,js)*vybarz(izm,js)

              delzsq = zsqbarz(izm,js) - zbarz(izm,js)**2
              delvzsq = vzsqbarz(izm,js) - vzbarz(izm,js)**2
              delzvz = zvzbarz(izm,js) - zbarz(izm,js)*vzbarz(izm,js)

              --- Compute overall true RMS positions
              xrmsz(izm,js) = sqrt(max(0.,delxsq))
              yrmsz(izm,js) = sqrt(max(0.,delysq))
              zrmsz(izm,js) = sqrt(max(0.,delzsq))
              rrmsz(izm,js) = sqrt(xrmsz(izm,js)**2 + yrmsz(izm,js)**2)

              --- Compute overall true RMS velocities
              xprmsz(izm,js) = sqrt(max(0.,delxpsq))
              yprmsz(izm,js) = sqrt(max(0.,delypsq))
              vxrmsz(izm,js) = sqrt(max(0.,delvxsq))
              vyrmsz(izm,js) = sqrt(max(0.,delvysq))
              vzrmsz(izm,js) = sqrt(max(0.,delvzsq))

              --- Compute overall emittances
              epsxz(izm,js) = 4.*sqrt(max(0.,delxsq*delxpsq - delxxp**2))
              epsyz(izm,js) = 4.*sqrt(max(0.,delysq*delypsq - delyyp**2))
              epsnxz(izm,js) = 4.*sqrt(max(0.,delxsq*delvxsq - delxvx**2))*clighti*1.e6
              epsnyz(izm,js) = 4.*sqrt(max(0.,delysq*delvysq - delyvy**2))*clighti*1.e6

              epsnzz(izm,js) = 4.*sqrt(max(0.,delzsq*delvzsq - delzvz**2))*clighti
              epszz(izm,js) = epsnzz(izm,js)/dvnz(vzbarz(izm,js)*clighti)

              --- Compute generalized emittances
              tr = 4.*((delxsq + delysq)*(delxpsq + delypsq)
     &             -(delxxp + delyyp)**2 - (delxyp - delyxp)**2)
              epsrz(izm,js) = sqrt(max(0.,tr))
              tg = 0.5*(epsxz(izm,js)**2 + epsyz(izm,js)**2) +
     &           16*(delxy*delxpyp - delxyp*delyxp)
              epsgz(izm,js) = sqrt(max(0.,tg))
              th = epsxz(izm,js)**2*epsyz(izm,js)**2 +
     &          256*((delxy*delxpyp)**2 + (delxyp*delyxp)**2 -
     &          delxsq*delysq*(delxpyp)**2 - delxsq*delypsq*(delyxp)**2 -
     &          delxpsq*delysq*(delxyp)**2 - delxpsq*delypsq*(delxy)**2 -
     &          2*delxy*delxyp*delyxp*delxpyp + 2*delxxp*delypsq*delxy*delyxp -
     &          2*delxxp*delyyp*delxy*delxpyp - 2*delxxp*delyyp*delxyp*delyxp +
     &          2*delxpsq*delyyp*delxy*delxyp + 2*delxsq*delyyp*delyxp*delxpyp +
     &          2*delxxp*delysq*delxpyp*delxyp)
              epshz(izm,js) = sqrt(sqrt(max(0.,th)))

              --- Compute normalized generalized emittances
              tr = 4.*((delxsq + delysq)*(delvxsq + delvysq)
     &             -(delxvx + delyvy)**2 - (delxvy - delyvx)**2)
              epsnrz(izm,js) = sqrt(max(0.,tr))*clighti*1.e6
              tg = 0.5*(epsnxz(izm,js)**2 + epsnyz(izm,js)**2) +
     &           16*(delxy*delvxvy - delxvy*delyvx)
              epsngz(izm,js) = sqrt(max(0.,tg))*clighti*1.e6
              th = epsnxz(izm,js)**2*epsnyz(izm,js)**2 +
     &          256*((delxy*delvxvy)**2 + (delxvy*delyvx)**2 -
     &          delxsq*delysq*(delvxvy)**2 - delxsq*delvysq*(delyvx)**2 -
     &          delvxsq*delysq*(delxvy)**2 - delvxsq*delvysq*(delxy)**2 -
     &          2*delxy*delxvy*delyvx*delvxvy + 2*delxvx*delvysq*delxy*delyvx -
     &          2*delxvx*delyvy*delxy*delvxvy - 2*delxvx*delyvy*delxvy*delyvx +
     &          2*delvxsq*delyvy*delxy*delxvy + 2*delxsq*delyvy*delyvx*delvxvy +
     &          2*delxvx*delysq*delvxvy*delxvy)
              epsnhz(izm,js) = sqrt(sqrt(max(0.,th)))*clighti*1.e6

            enddo

            --- Save window moments.  Linearly interpolate the Z moments data
            --- to the Z window center.
            --- If the zwindow is outside the zmoments range, don't set
            --- anything.
            do iw = 1,nzwind
              zwin = 0.5*(zwindows(1,iw) + zwindows(2,iw))
              if (zmmntmin <= zwin .and. zwin < zmmntmax) then
                icell = int((zwin - zmmntmin)*dzmi)
                wz0 = (zwin - zmmntmin)*dzmi - icell
                wz1 = 1. - wz0
                pnum(iw,js)   =pnumz(icell,js)*wz1    + pnumz(icell+1,js)*wz0
                xbar(iw,js)   =xbarz(icell,js)*wz1    + xbarz(icell+1,js)*wz0
                ybar(iw,js)   =ybarz(icell,js)*wz1    + ybarz(icell+1,js)*wz0
                zbar(iw,js)   =zbarz(icell,js)*wz1    + zbarz(icell+1,js)*wz0
                xpbar(iw,js)  =xpbarz(icell,js)*wz1   + xpbarz(icell+1,js)*wz0
                ypbar(iw,js)  =ypbarz(icell,js)*wz1   + ypbarz(icell+1,js)*wz0
                vxbar(iw,js)  =vxbarz(icell,js)*wz1   + vxbarz(icell+1,js)*wz0
                vybar(iw,js)  =vybarz(icell,js)*wz1   + vybarz(icell+1,js)*wz0
                vzbar(iw,js)  =vzbarz(icell,js)*wz1   + vzbarz(icell+1,js)*wz0
                xybar(iw,js)  =xybarz(icell,js)*wz1   + xybarz(icell+1,js)*wz0
                xypbar(iw,js) =xypbarz(icell,js)*wz1  + xypbarz(icell+1,js)*wz0
                xvybar(iw,js) =xvybarz(icell,js)*wz1  + xvybarz(icell+1,js)*wz0
                yxpbar(iw,js) =yxpbarz(icell,js)*wz1  + yxpbarz(icell+1,js)*wz0
                yvxbar(iw,js) =yvxbarz(icell,js)*wz1  + yvxbarz(icell+1,js)*wz0
                xpypbar(iw,js)=xpypbarz(icell,js)*wz1 + xpypbarz(icell+1,js)*wz0
                vxvybar(iw,js)=vxvybarz(icell,js)*wz1 + vxvybarz(icell+1,js)*wz0
                xsqbar(iw,js) =xsqbarz(icell,js)*wz1  + xsqbarz(icell+1,js)*wz0
                ysqbar(iw,js) =ysqbarz(icell,js)*wz1  + ysqbarz(icell+1,js)*wz0
                zsqbar(iw,js) =zsqbarz(icell,js)*wz1  + zsqbarz(icell+1,js)*wz0
                xpsqbar(iw,js)=xpsqbarz(icell,js)*wz1 + xpsqbarz(icell+1,js)*wz0
                ypsqbar(iw,js)=ypsqbarz(icell,js)*wz1 + ypsqbarz(icell+1,js)*wz0
                vxsqbar(iw,js)=vxsqbarz(icell,js)*wz1 + vxsqbarz(icell+1,js)*wz0
                vysqbar(iw,js)=vysqbarz(icell,js)*wz1 + vysqbarz(icell+1,js)*wz0
                vzsqbar(iw,js)=vzsqbarz(icell,js)*wz1 + vzsqbarz(icell+1,js)*wz0
                xxpbar(iw,js) =xxpbarz(icell,js)*wz1  + xxpbarz(icell+1,js)*wz0
                xvxbar(iw,js) =xvxbarz(icell,js)*wz1  + xvxbarz(icell+1,js)*wz0
                yypbar(iw,js) =yypbarz(icell,js)*wz1  + yypbarz(icell+1,js)*wz0
                yvybar(iw,js) =yvybarz(icell,js)*wz1  + yvybarz(icell+1,js)*wz0
                xvzbar(iw,js) =xvzbarz(icell,js)*wz1  + xvzbarz(icell+1,js)*wz0
                yvzbar(iw,js) =yvzbarz(icell,js)*wz1  + yvzbarz(icell+1,js)*wz0
                zvzbar(iw,js) =zvzbarz(icell,js)*wz1  + zvzbarz(icell+1,js)*wz0
                vxvzbar(iw,js)=vxvzbarz(icell,js)*wz1 + vxvzbarz(icell+1,js)*wz0
                vyvzbar(iw,js)=vyvzbarz(icell,js)*wz1 + vyvzbarz(icell+1,js)*wz0
                xrms(iw,js)   =xrmsz(icell,js)*wz1    + xrmsz(icell+1,js)*wz0
                yrms(iw,js)   =yrmsz(icell,js)*wz1    + yrmsz(icell+1,js)*wz0
                zrms(iw,js)   =zrmsz(icell,js)*wz1    + zrmsz(icell+1,js)*wz0
                rrms(iw,js)   =rrmsz(icell,js)*wz1    + rrmsz(icell+1,js)*wz0
                xprms(iw,js)  =xprmsz(icell,js)*wz1   + xprmsz(icell+1,js)*wz0
                yprms(iw,js)  =yprmsz(icell,js)*wz1   + yprmsz(icell+1,js)*wz0
                epsx(iw,js)   =epsxz(icell,js)*wz1    + epsxz(icell+1,js)*wz0
                epsy(iw,js)   =epsyz(icell,js)*wz1    + epsyz(icell+1,js)*wz0
                epsz(iw,js)   =epszz(icell,js)*wz1    + epszz(icell+1,js)*wz0
                epsnx(iw,js)  =epsnxz(icell,js)*wz1   + epsnxz(icell+1,js)*wz0
                epsny(iw,js)  =epsnyz(icell,js)*wz1   + epsnyz(icell+1,js)*wz0
                epsnz(iw,js)  =epsnzz(icell,js)*wz1   + epsnzz(icell+1,js)*wz0
                epsr(iw,js)   =epsrz(icell,js)*wz1    + epsrz(icell+1,js)*wz0
                epsg(iw,js)   =epsgz(icell,js)*wz1    + epsgz(icell+1,js)*wz0
                epsh(iw,js)   =epshz(icell,js)*wz1    + epshz(icell+1,js)*wz0
                epsnr(iw,js)  =epsnrz(icell,js)*wz1   + epsnrz(icell+1,js)*wz0
                epsng(iw,js)  =epsngz(icell,js)*wz1   + epsngz(icell+1,js)*wz0
                epsnh(iw,js)  =epsnhz(icell,js)*wz1   + epsnhz(icell+1,js)*wz0
                vxrms(iw,js)  =vxrmsz(icell,js)*wz1   + vxrmsz(icell+1,js)*wz0
                vyrms(iw,js)  =vyrmsz(icell,js)*wz1   + vyrmsz(icell+1,js)*wz0
                vzrms(iw,js)  =vzrmsz(icell,js)*wz1   + vzrmsz(icell+1,js)*wz0
              endif
            enddo
          enddo

        endif

      endif

!$OMP ATOMIC
      momentstime = momentstime + (wtime() - timetemp)

      return
      end

[padvnc3d] [padvncrz] [padvncxy] [wrzgen]
      subroutine getzmmnt_weights(np,xp,yp,zp,uxp,uyp,uzp,gaminv,wp,q,m,w,dt,
     &                            dtscale,itask,nplive,
     &                    uxpo,uypo,uzpo,is,isid,ismax,maxp,minp,zmmnts0,zmmnts)
      use Constant
      use Beam_acc
      use InDiag
      use Z_Moments
      use Win_Moments
      use Moments
      use Picglb
      use ExtPart
      use Timers
      integer(ISZ):: np,itask,nplive,is,isid,ismax
      real(kind=8):: q,m,w,dt,dtscale
      real(kind=8):: xp(np), yp(np), zp(np)
      real(kind=8):: uxp(np), uyp(np), uzp(np), gaminv(np), wp(np)
      real(kind=8):: uxpo(np), uypo(np), uzpo(np)
      real(kind=8):: maxp(6,0:nszmmnt),minp(6,0:nszmmnt)
      real(kind=8):: zmmnts0(NUMZMMNT,0:nszmmnt)
      real(kind=8):: zmmnts(0:nzmmnt,NUMZMMNT,0:nszmmnt)

   Sets moments for species 1 in as a function of z.
   Interpolation is done to grid centers
   Note that lost particles are to the left of the z grid.
   Note: "window zero" includes all particles, and no extrapolation is done
   Note: zbar and zsqbar calculations are meaningless except for 
         window zero (WE SHOULD DELETE ?)
   Note: Vectorized over moments.
         particle moment calculations are vectorized over particles
         moments are summed into a 2-D array, vectorized over moments
         then they are dumped into the seperate 1-D arrays in itask=3
   Three parts
   When itask=1  zeros out all moments
        itask=2  sums moments from particles
        itask=3  divides by number of particles, calculates emittances and rms

   Note that the moments are scaled by the dtscale factor. This accounts for
   the fact that particles with larger dtscale would normally contribute more
   if nominal dt would be used since they would spend proportionately more
   time in any location.

      real(kind=8),allocatable:: pp(:,:)
      integer(ISZ),allocatable:: iz(:)
      integer(ISZ):: im,izm,iw,ip,icell,i,js
      real(kind=8):: dti,dtip1,wz1,wz0,zwin
      real(kind=8):: oneondt,clighti,vzi
      real(kind=8):: pnumi(0:nszmmnt)
      real(kind=8):: delxsq(0:nszmmnt),delxxp(0:nszmmnt),delxpsq(0:nszmmnt)
      real(kind=8):: delysq(0:nszmmnt),delyyp(0:nszmmnt),delypsq(0:nszmmnt)
      real(kind=8):: delxy(0:nszmmnt),delxyp(0:nszmmnt)
      real(kind=8):: delyxp(0:nszmmnt),delxpyp(0:nszmmnt)
      real(kind=8):: delzsq(0:nszmmnt),delvzsq(0:nszmmnt),delzvz(0:nszmmnt)
      real(kind=8):: delvxsq(0:nszmmnt),delvysq(0:nszmmnt)
      real(kind=8):: tg(0:nszmmnt),th(0:nszmmnt)
      real(kind=8):: gamma
      real(kind=8):: timetemp,wtime
      real(kind=8):: zmmntscopy(NUMZMMNT)
#ifdef J90
      real(kind=8):: pmmnts(NUMZMMNT,np)
#endif

      timetemp = wtime()

      if (ifzmmnt == 0) return

      oneondt = 1./dvnz(dt)

      --- If lspeciesmoments is true, then calculate the moments for each
      --- species separately. The moments for all species combined will be
      --- done afterwards. Otherwise, only calculate the combined moments.
      if (lspeciesmoments) then
        js = isid - 1
      else
        js = 0
      endif

      if (itask == 1 .and. .not. laccumulate_zmoments) then
        call getzmmnt(np,xp,yp,zp,uxp,uyp,uzp,gaminv,q,m,w,dt,dtscale,
     &                itask,nplive,
     &                uxpo,uypo,uzpo,is,isid,ismax,maxp,minp,zmmnts0,zmmnts)
      endif

      if (itask == 2) then

        allocate(pp(0:18,np),iz(np))

        --- Save the mass and weight
        --- Note that the charge is not needed and should be removed from the
        --- arugment list.
        zmmntsq(js) = q
        zmmntsm(js) = m
        zmmntsw(js) = w

        --- Set flag that moments for this species has been calculated.
        zmomentscalculated(js) = .true.

        --- Set maximum and minimum particle coordinate variables
        do ip = 1, np
          maxp(1,js) = max(maxp(1,js), xp(ip))
          minp(1,js) = min(minp(1,js), xp(ip))
          maxp(2,js) = max(maxp(2,js), yp(ip))
          minp(2,js) = min(minp(2,js), yp(ip))
          maxp(3,js) = max(maxp(3,js), zp(ip))
          minp(3,js) = min(minp(3,js), zp(ip))
          if (l_momentum_moments .or. .not. lrelativ) then
            maxp(4,js) = max(maxp(4,js), uxp(ip))
            minp(4,js) = min(minp(4,js), uxp(ip))
            maxp(5,js) = max(maxp(5,js), uyp(ip))
            minp(5,js) = min(minp(5,js), uyp(ip))
            maxp(6,js) = max(maxp(6,js), uzp(ip))
            minp(6,js) = min(minp(6,js), uzp(ip))
          else
            maxp(4,js) = max(maxp(4,js), gaminv(ip)*uxp(ip))
            minp(4,js) = min(minp(4,js), gaminv(ip)*uxp(ip))
            maxp(5,js) = max(maxp(5,js), gaminv(ip)*uyp(ip))
            minp(5,js) = min(minp(5,js), gaminv(ip)*uyp(ip))
            maxp(6,js) = max(maxp(6,js), gaminv(ip)*uzp(ip))
            minp(6,js) = min(minp(6,js), gaminv(ip)*uzp(ip))
          endif
          nplive = nplive + 1

          --- Set flag so particles out of range don't contribute
          ---   not added to total moments
          ---   weights set to zero for z moments
          pp(0,ip) = 1.
          if (ifzmmnt == 1) then
            if ((zp(ip) - zbeam) < zmmntmin .or.
     &          (zp(ip) - zbeam) > zmmntmax) pp(0,ip) = 0.
          else
            if ((zp(ip) - zbeam) <  zmmntmin .or.
     &          (zp(ip) - zbeam) >= zmmntmax) pp(0,ip) = 0.
          endif

        enddo

        --- Accumate global moments first.

        --- Used to add in dtscale factor below.
        if (dtscale /= 1.) zmmntscopy = zmmnts0(:,js)

 -------------------------------------------------------------------------
#ifdef J90
        --- This method is faster for vectorized machines, primarily the J90.
        --- put particle moments into pmmnts array for total moments
        if (l_momentum_moments .or. .not. lrelativ) then
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            pmmnts( 1,ip) = wp(ip)
            pmmnts( 2,ip) = xp(ip)*wp(ip)
            pmmnts( 3,ip) = yp(ip)*wp(ip)
            pmmnts( 4,ip) = zp(ip)*wp(ip)
            pmmnts( 7,ip) = uxp(ip)*wp(ip)
            pmmnts( 8,ip) = uyp(ip)*wp(ip)
            pmmnts( 9,ip) = uzp(ip)*wp(ip)
            pmmnts(10,ip) = xp(ip)*yp(ip)*wp(ip)
            pmmnts(12,ip) = xp(ip)**2*wp(ip)
            pmmnts(13,ip) = yp(ip)**2*wp(ip)
            pmmnts(14,ip) = zp(ip)**2*wp(ip)
            pmmnts(17,ip) = (gaminv(ip)*uxp(ip))**2*wp(ip)
            pmmnts(18,ip) = (gaminv(ip)*uyp(ip))**2*wp(ip)
            pmmnts(19,ip) = (gaminv(ip)*uzp(ip))**2*wp(ip)
            pmmnts(22,ip) = zp(ip)*uzp(ip)*wp(ip)
            pmmnts(25,ip) = xp(ip)*uzp(ip)*wp(ip)
            pmmnts(26,ip) = yp(ip)*uzp(ip)*wp(ip)
            pmmnts(27,ip) = uxp(ip)*uzp(ip)*wp(ip)
            pmmnts(28,ip) = uyp(ip)*uzp(ip)*wp(ip)
            pmmnts( 5,ip) = uxp(ip)*vzi*wp(ip)
            pmmnts( 6,ip) = uyp(ip)*vzi*wp(ip)
            pmmnts(11,ip) = uxp(ip)*uyp(ip)*vzi**2*wp(ip)
            pmmnts(15,ip) = (uxp(ip)*vzi)**2*wp(ip)
            pmmnts(16,ip) = (uyp(ip)*vzi)**2*wp(ip)
            pmmnts(20,ip) = xp(ip)*uxp(ip)*vzi*wp(ip)
            pmmnts(21,ip) = yp(ip)*uyp(ip)*vzi*wp(ip)
            pmmnts(23,ip) = xp(ip)*uyp(ip)*vzi*wp(ip)
            pmmnts(24,ip) = yp(ip)*uxp(ip)*vzi*wp(ip)
            pmmnts(29,ip) = xp(ip)*uxp(ip)*wp(ip)
            pmmnts(30,ip) = yp(ip)*uyp(ip)*wp(ip)
            pmmnts(31,ip) = xp(ip)*uyp(ip)*wp(ip)
            pmmnts(32,ip) = yp(ip)*uxp(ip)*wp(ip)
            pmmnts(33,ip) = uxp(ip)*uyp(ip)*wp(ip)
          enddo
        else
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            pmmnts( 1,ip) = wp(ip)
            pmmnts( 2,ip) = xp(ip)*wp(ip)
            pmmnts( 3,ip) = yp(ip)*wp(ip)
            pmmnts( 4,ip) = zp(ip)*wp(ip)
            pmmnts( 7,ip) = uxp(ip)*gaminv(ip)*wp(ip)
            pmmnts( 8,ip) = uyp(ip)*gaminv(ip)*wp(ip)
            pmmnts( 9,ip) = uzp(ip)*gaminv(ip)*wp(ip)
            pmmnts(10,ip) = xp(ip)*yp(ip)*wp(ip)
            pmmnts(12,ip) = xp(ip)**2*wp(ip)
            pmmnts(13,ip) = yp(ip)**2*wp(ip)
            pmmnts(14,ip) = zp(ip)**2*wp(ip)
            pmmnts(17,ip) = (gaminv(ip)*uxp(ip))**2*wp(ip)
            pmmnts(18,ip) = (gaminv(ip)*uyp(ip))**2*wp(ip)
            pmmnts(19,ip) = (gaminv(ip)*uzp(ip))**2*wp(ip)
            pmmnts(22,ip) = zp(ip)*gaminv(ip)*uzp(ip)*wp(ip)
            pmmnts(25,ip) = xp(ip)*uzp(ip)*gaminv(ip)*wp(ip)
            pmmnts(26,ip) = yp(ip)*uzp(ip)*gaminv(ip)*wp(ip)
            pmmnts(27,ip) = uxp(ip)*uzp(ip)*gaminv(ip)**2*wp(ip)
            pmmnts(28,ip) = uyp(ip)*uzp(ip)*gaminv(ip)**2*wp(ip)
            pmmnts( 5,ip) = uxp(ip)*vzi*wp(ip)
            pmmnts( 6,ip) = uyp(ip)*vzi*wp(ip)
            pmmnts(11,ip) = uxp(ip)*uyp(ip)*vzi**2*wp(ip)
            pmmnts(15,ip) = (uxp(ip)*vzi)**2*wp(ip)
            pmmnts(16,ip) = (uyp(ip)*vzi)**2*wp(ip)
            pmmnts(20,ip) = xp(ip)*uxp(ip)*vzi*wp(ip)
            pmmnts(21,ip) = yp(ip)*uyp(ip)*vzi*wp(ip)
            pmmnts(23,ip) = xp(ip)*uyp(ip)*vzi*wp(ip)
            pmmnts(24,ip) = yp(ip)*uxp(ip)*vzi*wp(ip)
            pmmnts(29,ip) = xp(ip)*uxp(ip)*gaminv(ip)*wp(ip)
            pmmnts(30,ip) = yp(ip)*uyp(ip)*gaminv(ip)*wp(ip)
            pmmnts(31,ip) = xp(ip)*uyp(ip)*gaminv(ip)*wp(ip)
            pmmnts(32,ip) = yp(ip)*uxp(ip)*gaminv(ip)*wp(ip)
            pmmnts(33,ip) = uxp(ip)*uyp(ip)*gaminv(ip)**2*wp(ip)
          enddo
        endif

        --- sum total moments
        --- Timings show that this loop runs much faster if the inner loop is
        --- completely unrolled.  It is faster on both HP workstations and
        --- CRAY C90.  WARNING: if NUMZMMNT changes, this loop needs to be
        --- adjusted to reflect that change.  Note that the similar loops
        --- below are still faster in their present form.
        do ip=1,np
          do im=1,NUMZMMNT
            zmmnts0(im,js) = zmmnts0(im,js) + pmmnts(ip,im)
          enddo
          zmmnts0( 1,js) = zmmnts0( 1,js) + pmmnts( 1,ip)
          zmmnts0( 2,js) = zmmnts0( 2,js) + pmmnts( 2,ip)
          zmmnts0( 3,js) = zmmnts0( 3,js) + pmmnts( 3,ip)
          zmmnts0( 4,js) = zmmnts0( 4,js) + pmmnts( 4,ip)
          zmmnts0( 5,js) = zmmnts0( 5,js) + pmmnts( 5,ip)
          zmmnts0( 6,js) = zmmnts0( 6,js) + pmmnts( 6,ip)
          zmmnts0( 7,js) = zmmnts0( 7,js) + pmmnts( 7,ip)
          zmmnts0( 8,js) = zmmnts0( 8,js) + pmmnts( 8,ip)
          zmmnts0( 9,js) = zmmnts0( 9,js) + pmmnts( 9,ip)
          zmmnts0(10,js) = zmmnts0(10,js) + pmmnts(10,ip)
          zmmnts0(11,js) = zmmnts0(11,js) + pmmnts(11,ip)
          zmmnts0(12,js) = zmmnts0(12,js) + pmmnts(12,ip)
          zmmnts0(13,js) = zmmnts0(13,js) + pmmnts(13,ip)
          zmmnts0(14,js) = zmmnts0(14,js) + pmmnts(14,ip)
          zmmnts0(15,js) = zmmnts0(15,js) + pmmnts(15,ip)
          zmmnts0(16,js) = zmmnts0(16,js) + pmmnts(16,ip)
          zmmnts0(17,js) = zmmnts0(17,js) + pmmnts(17,ip)
          zmmnts0(18,js) = zmmnts0(18,js) + pmmnts(18,ip)
          zmmnts0(19,js) = zmmnts0(19,js) + pmmnts(19,ip)
          zmmnts0(20,js) = zmmnts0(20,js) + pmmnts(20,ip)
          zmmnts0(21,js) = zmmnts0(21,js) + pmmnts(21,ip)
          zmmnts0(22,js) = zmmnts0(22,js) + pmmnts(22,ip)
          zmmnts0(23,js) = zmmnts0(23,js) + pmmnts(23,ip) 
          zmmnts0(24,js) = zmmnts0(24,js) + pmmnts(24,ip) 
          zmmnts0(25,js) = zmmnts0(25,js) + pmmnts(25,ip) 
          zmmnts0(26,js) = zmmnts0(26,js) + pmmnts(26,ip) 
          zmmnts0(27,js) = zmmnts0(27,js) + pmmnts(27,ip) 
          zmmnts0(28,js) = zmmnts0(28,js) + pmmnts(28,ip) 
          zmmnts0(29,js) = zmmnts0(29,js) + pmmnts(29,ip) 
          zmmnts0(30,js) = zmmnts0(30,js) + pmmnts(30,ip) 
          zmmnts0(31,js) = zmmnts0(31,js) + pmmnts(31,ip) 
          zmmnts0(32,js) = zmmnts0(32,js) + pmmnts(32,ip) 
          zmmnts0(33,js) = zmmnts0(33,js) + pmmnts(33,ip) 
        enddo

#else

 -------------------------------------------------------------------------
        --- This method is faster for RISC and pentium machines.
        --- sum particle moments into total moments
        if (l_momentum_moments .or. .not. lrelativ) then
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            zmmnts0( 1,js) = zmmnts0( 1,js) + wp(ip)
            zmmnts0( 2,js) = zmmnts0( 2,js) + xp(ip)*wp(ip)
            zmmnts0( 3,js) = zmmnts0( 3,js) + yp(ip)*wp(ip)
            zmmnts0( 4,js) = zmmnts0( 4,js) + zp(ip)*wp(ip)
            zmmnts0( 7,js) = zmmnts0( 7,js) + uxp(ip)*wp(ip)
            zmmnts0( 8,js) = zmmnts0( 8,js) + uyp(ip)*wp(ip)
            zmmnts0( 9,js) = zmmnts0( 9,js) + uzp(ip)*wp(ip)
            zmmnts0(10,js) = zmmnts0(10,js) + xp(ip)*yp(ip)*wp(ip)
            zmmnts0(12,js) = zmmnts0(12,js) + xp(ip)**2*wp(ip)
            zmmnts0(13,js) = zmmnts0(13,js) + yp(ip)**2*wp(ip)
            zmmnts0(14,js) = zmmnts0(14,js) + zp(ip)**2*wp(ip)
            zmmnts0(17,js) = zmmnts0(17,js) + (gaminv(ip)*uxp(ip))**2*wp(ip)
            zmmnts0(18,js) = zmmnts0(18,js) + (gaminv(ip)*uyp(ip))**2*wp(ip)
            zmmnts0(19,js) = zmmnts0(19,js) + (gaminv(ip)*uzp(ip))**2*wp(ip)
            zmmnts0(22,js) = zmmnts0(22,js) + zp(ip)*uzp(ip)*wp(ip)
            zmmnts0(25,js) = zmmnts0(25,js) + xp(ip)*uzp(ip)*wp(ip)
            zmmnts0(26,js) = zmmnts0(26,js) + yp(ip)*uzp(ip)*wp(ip)
            zmmnts0(27,js) = zmmnts0(27,js) + uxp(ip)*uzp(ip)*wp(ip)
            zmmnts0(28,js) = zmmnts0(28,js) + uyp(ip)*uzp(ip)*wp(ip)
            zmmnts0( 5,js) = zmmnts0( 5,js) + uxp(ip)*vzi*wp(ip)
            zmmnts0( 6,js) = zmmnts0( 6,js) + uyp(ip)*vzi*wp(ip)
            zmmnts0(11,js) = zmmnts0(11,js) + uxp(ip)*uyp(ip)*vzi**2*wp(ip)
            zmmnts0(15,js) = zmmnts0(15,js) + (uxp(ip)*vzi)**2*wp(ip)
            zmmnts0(16,js) = zmmnts0(16,js) + (uyp(ip)*vzi)**2*wp(ip)
            zmmnts0(20,js) = zmmnts0(20,js) + xp(ip)*uxp(ip)*vzi*wp(ip)
            zmmnts0(21,js) = zmmnts0(21,js) + yp(ip)*uyp(ip)*vzi*wp(ip)
            zmmnts0(23,js) = zmmnts0(23,js) + xp(ip)*uyp(ip)*vzi*wp(ip)
            zmmnts0(24,js) = zmmnts0(24,js) + yp(ip)*uxp(ip)*vzi*wp(ip)
            zmmnts0(29,js) = zmmnts0(29,js) + xp(ip)*uxp(ip)*wp(ip)
            zmmnts0(30,js) = zmmnts0(30,js) + yp(ip)*uyp(ip)*wp(ip)
            zmmnts0(31,js) = zmmnts0(31,js) + xp(ip)*uyp(ip)*wp(ip)
            zmmnts0(32,js) = zmmnts0(32,js) + yp(ip)*uxp(ip)*wp(ip)
            zmmnts0(33,js) = zmmnts0(33,js) + uxp(ip)*uyp(ip)*wp(ip)
          enddo
        else
          do ip=1,np
            vzi = 1./(uzp(ip)+SMALLPOS)
            zmmnts0( 1,js) = zmmnts0( 1,js) + wp(ip)
            zmmnts0( 2,js) = zmmnts0( 2,js) + xp(ip)*wp(ip)
            zmmnts0( 3,js) = zmmnts0( 3,js) + yp(ip)*wp(ip)
            zmmnts0( 4,js) = zmmnts0( 4,js) + zp(ip)*wp(ip)
            zmmnts0( 7,js) = zmmnts0( 7,js) + uxp(ip)*gaminv(ip)*wp(ip)
            zmmnts0( 8,js) = zmmnts0( 8,js) + uyp(ip)*gaminv(ip)*wp(ip)
            zmmnts0( 9,js) = zmmnts0( 9,js) + uzp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(10,js) = zmmnts0(10,js) + xp(ip)*yp(ip)*wp(ip)
            zmmnts0(12,js) = zmmnts0(12,js) + xp(ip)**2*wp(ip)
            zmmnts0(13,js) = zmmnts0(13,js) + yp(ip)**2*wp(ip)
            zmmnts0(14,js) = zmmnts0(14,js) + zp(ip)**2*wp(ip)
            zmmnts0(17,js) = zmmnts0(17,js) + (gaminv(ip)*uxp(ip))**2*wp(ip)
            zmmnts0(18,js) = zmmnts0(18,js) + (gaminv(ip)*uyp(ip))**2*wp(ip)
            zmmnts0(19,js) = zmmnts0(19,js) + (gaminv(ip)*uzp(ip))**2*wp(ip)
            zmmnts0(22,js) = zmmnts0(22,js) + zp(ip)*gaminv(ip)*uzp(ip)*wp(ip)
            zmmnts0(25,js) = zmmnts0(25,js) + xp(ip)*uzp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(26,js) = zmmnts0(26,js) + yp(ip)*uzp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(27,js) = zmmnts0(27,js) + uxp(ip)*uzp(ip)*gaminv(ip)**2*wp(ip)
            zmmnts0(28,js) = zmmnts0(28,js) + uyp(ip)*uzp(ip)*gaminv(ip)**2*wp(ip)
            zmmnts0( 5,js) = zmmnts0( 5,js) + uxp(ip)*vzi*wp(ip)
            zmmnts0( 6,js) = zmmnts0( 6,js) + uyp(ip)*vzi*wp(ip)
            zmmnts0(11,js) = zmmnts0(11,js) + uxp(ip)*uyp(ip)*vzi**2*wp(ip)
            zmmnts0(15,js) = zmmnts0(15,js) + (uxp(ip)*vzi)**2*wp(ip)
            zmmnts0(16,js) = zmmnts0(16,js) + (uyp(ip)*vzi)**2*wp(ip)
            zmmnts0(20,js) = zmmnts0(20,js) + xp(ip)*uxp(ip)*vzi*wp(ip)
            zmmnts0(21,js) = zmmnts0(21,js) + yp(ip)*uyp(ip)*vzi*wp(ip)
            zmmnts0(23,js) = zmmnts0(23,js) + xp(ip)*uyp(ip)*vzi*wp(ip)
            zmmnts0(24,js) = zmmnts0(24,js) + yp(ip)*uxp(ip)*vzi*wp(ip)
            zmmnts0(29,js) = zmmnts0(29,js) + xp(ip)*uxp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(30,js) = zmmnts0(30,js) + yp(ip)*uyp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(31,js) = zmmnts0(31,js) + xp(ip)*uyp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(32,js) = zmmnts0(32,js) + yp(ip)*uxp(ip)*gaminv(ip)*wp(ip)
            zmmnts0(33,js) = zmmnts0(33,js) + uxp(ip)*uyp(ip)*gaminv(ip)**2*wp(ip)
          enddo
        endif

 -------------------------------------------------------------------------
#endif

        --- Adds in dtscale factor efficiently. Somewhat kludgy though.
        if (dtscale /= 1.)
     &    zmmnts0(:,js) = zmmntscopy + (zmmnts0(:,js) - zmmntscopy)*dtscale


        --- Now, accumulate moments onto Z grid.
        if (ifzmmnt == 2) then

          --- Extrapolate quantities onto grid points so all particles
          ---   contributing to a moment are at same z.
          --- dti and dtip1 calculated by assuming constant Vz and 
          ---   computing times at which z crosses the two nearest grid points.
          --- V extrapolated using current and previous values of V
          ---   (this avoids having to use E and B to extrapolate V).
          --- X extrapolated assuming constant V (uses V at particle position).
          --- After extrapolation, data is linearly weighted onto grid by
          ---   particle's z location.  This has the effect that the less
          ---   accurately extrapolated particles, which are far from the
          ---   grid point, are weighted less, hopefully helping smoothness.

          --- extrapolate to two nearest cells
          do ip=1,np
            if (pp(0,ip) /= 0.) then
              vzi = 1./(uzp(ip)*gaminv(ip)+SMALLPOS)
              iz(ip)  = (zp(ip) - zbeam - zmmntmin)*dzmi
              dti = (iz(ip)*dzm-zp(ip)+zbeam+zmmntmin)*vzi
              dti = max(-zmmntdtextmax*dt,min(zmmntdtextmax*dt,dti))
              pp(3,ip) = uxp(ip)*(1. + dti*oneondt) - uxpo(ip)*dti*oneondt
              pp(7,ip) = uyp(ip)*(1. + dti*oneondt) - uypo(ip)*dti*oneondt
              pp(11,ip) = uzp(ip)*(1. + dti*oneondt) - uzpo(ip)*dti*oneondt
              pp(1,ip) = xp(ip) + uxp(ip)*dti*gaminv(ip)
              pp(5,ip) = yp(ip) + uyp(ip)*dti*gaminv(ip)
              pp(9,ip) = zp(ip) + uzp(ip)*dti*gaminv(ip)
              pp(9,ip) = iz(ip)*dzm + zbeam + zmmntmin

              if (l_momentum_moments .or. .not. lrelativ) then
                pp(18,ip) = 1.
              else
                pp(18,ip) = 1./
     &          sqrt(1. + (pp(3,ip)**2 + pp(7,ip)**2 + pp(11,ip)**2)/clight**2)
              endif

              dtip1=((iz(ip)+1)*dzm-zp(ip)+zbeam+zmmntmin)*vzi
              dtip1 = max(-zmmntdtextmax*dt,min(zmmntdtextmax*dt,dtip1))
              pp(4,ip) = uxp(ip)*(1. + dtip1*oneondt) - uxpo(ip)*dtip1*oneondt
              pp(8,ip) = uyp(ip)*(1. + dtip1*oneondt) - uypo(ip)*dtip1*oneondt
              pp(12,ip) = uzp(ip)*(1. + dtip1*oneondt) - uzpo(ip)*dtip1*oneondt
              pp(2,ip) = xp(ip) + uxp(ip)*dtip1*gaminv(ip)
              pp(6,ip) = yp(ip) + uyp(ip)*dtip1*gaminv(ip)
              pp(10,ip) = zp(ip) + uzp(ip)*dtip1*gaminv(ip)
              pp(10,ip) = (iz(ip)+1)*dzm + zbeam + zmmntmin

              if (l_momentum_moments .or. .not. lrelativ) then
                pp(13,ip) = 1.
              else
                pp(13,ip) = 1./
     &          sqrt(1. + (pp(4,ip)**2 + pp(8,ip)**2 + pp(12,ip)**2)/clight**2)
              endif

              pp(14,ip) = (1. - (zp(ip)-zbeam-zmmntmin)*dzmi + iz(ip))*wp(ip)*
     &                    dtscale
              pp(15,ip) = ((zp(ip) - zbeam - zmmntmin)*dzmi - iz(ip))*wp(ip)*
     &                    dtscale
              pp(16,ip) = dti
              pp(17,ip) = dtip1
            else
              iz(ip) = 0
              pp(:,ip) = 0.
            endif
          enddo

 -------------------------------------------------------------------------
#ifdef J90
        --- This method is faster for vectorized machines, primarily the J90.
          --- put particle moments into pmmnts array for zmmnts(iz,)
          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              pmmnts( 1,ip) = wz0
              pmmnts( 2,ip) = pp(1,ip)*wz0
              pmmnts( 3,ip) = pp(5,ip)*wz0
              pmmnts( 4,ip) = pp(9,ip)*wz0
              pmmnts( 7,ip) = pp(3,ip)*wz0
              pmmnts( 8,ip) = pp(7,ip)*wz0
              pmmnts( 9,ip) = pp(11,ip)*wz0
              pmmnts(10,ip) = pp(1,ip)*pp(5,ip)*wz0
              pmmnts(12,ip) = pp(1,ip)**2*wz0
              pmmnts(13,ip) = pp(5,ip)**2*wz0
              pmmnts(14,ip) = pp(9,ip)**2*wz0
              pmmnts(17,ip) = (pp(18,ip)*pp(3,ip))**2*wz0
              pmmnts(18,ip) = (pp(18,ip)*pp(7,ip))**2*wz0
              pmmnts(19,ip) = (pp(18,ip)*pp(11,ip))**2*wz0
              pmmnts(22,ip) = pp(9,ip)*pp(11,ip)*wz0
              pmmnts(25,ip) = pp(1,ip)*pp(11,ip)*wz0
              pmmnts(26,ip) = pp(5,ip)*pp(11,ip)*wz0
              pmmnts(27,ip) = pp(3,ip)*pp(11,ip)*wz0
              pmmnts(28,ip) = pp(7,ip)*pp(11,ip)*wz0
              pmmnts( 5,ip) = pp(3,ip)*vzi*wz0
              pmmnts( 6,ip) = pp(7,ip)*vzi*wz0
              pmmnts(11,ip) = pp(3,ip)*pp(7,ip)*vzi**2*wz0
              pmmnts(15,ip) = (pp(3,ip)*vzi)**2*wz0
              pmmnts(16,ip) = (pp(7,ip)*vzi)**2*wz0
              pmmnts(20,ip) = pp(1,ip)*pp(3,ip)*vzi*wz0
              pmmnts(21,ip) = pp(5,ip)*pp(7,ip)*vzi*wz0
              pmmnts(23,ip) = pp(1,ip)*pp(7,ip)*vzi*wz0
              pmmnts(24,ip) = pp(5,ip)*pp(3,ip)*vzi*wz0
              pmmnts(29,ip) = pp(1,ip)*pp(3,ip)*wz0
              pmmnts(30,ip) = pp(5,ip)*pp(7,ip)*wz0
              pmmnts(31,ip) = pp(1,ip)*pp(7,ip)*wz0
              pmmnts(32,ip) = pp(5,ip)*pp(3,ip)*wz0
              pmmnts(33,ip) = pp(3,ip)*pp(7,ip)*wz0
            enddo
          else
            do ip=1,np
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              pmmnts( 1,ip) = wz0
              pmmnts( 2,ip) = pp(1,ip)*wz0
              pmmnts( 3,ip) = pp(5,ip)*wz0
              pmmnts( 4,ip) = pp(9,ip)*wz0
              pmmnts( 7,ip) = pp(3,ip)*pp(18,ip)*wz0
              pmmnts( 8,ip) = pp(7,ip)*pp(18,ip)*wz0
              pmmnts( 9,ip) = pp(11,ip)*pp(18,ip)*wz0
              pmmnts(10,ip) = pp(1,ip)*pp(5,ip)*wz0
              pmmnts(12,ip) = pp(1,ip)**2*wz0
              pmmnts(13,ip) = pp(5,ip)**2*wz0
              pmmnts(14,ip) = pp(9,ip)**2*wz0
              pmmnts(17,ip) = (pp(18,ip)*pp(3,ip))**2*wz0
              pmmnts(18,ip) = (pp(18,ip)*pp(7,ip))**2*wz0
              pmmnts(19,ip) = (pp(18,ip)*pp(11,ip))**2*wz0
              pmmnts(22,ip) = pp(9,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(25,ip) = pp(1,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(26,ip) = pp(5,ip)*pp(11,ip)*pp(18,ip)*wz0
              pmmnts(27,ip) = pp(3,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              pmmnts(28,ip) = pp(7,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              pmmnts( 5,ip) = pp(3,ip)*vzi*wz0
              pmmnts( 6,ip) = pp(7,ip)*vzi*wz0
              pmmnts(11,ip) = pp(3,ip)*pp(7,ip)*vzi**2*wz0
              pmmnts(15,ip) = (pp(3,ip)*vzi)**2*wz0
              pmmnts(16,ip) = (pp(7,ip)*vzi)**2*wz0
              pmmnts(20,ip) = pp(1,ip)*pp(3,ip)*vzi*wz0
              pmmnts(21,ip) = pp(5,ip)*pp(7,ip)*vzi*wz0
              pmmnts(23,ip) = pp(1,ip)*pp(7,ip)*vzi*wz0
              pmmnts(24,ip) = pp(5,ip)*pp(3,ip)*vzi*wz0
              pmmnts(29,ip) = pp(1,ip)*pp(3,ip)*pp(18,ip)*wz0
              pmmnts(30,ip) = pp(5,ip)*pp(7,ip)*pp(18,ip)*wz0
              pmmnts(31,ip) = pp(1,ip)*pp(7,ip)*pp(18,ip)*wz0
              pmmnts(32,ip) = pp(5,ip)*pp(3,ip)*pp(18,ip)*wz0
              pmmnts(33,ip) = pp(3,ip)*pp(7,ip)*pp(18,ip)**2*wz0
            enddo
          endif

          --- deposit moments into array
          do ip=1,np
            do im=1,NUMZMMNT
              zmmnts(iz(ip),im,js) = zmmnts(iz(ip),im,js) + pmmnts(im,ip)
            enddo
          enddo

          --- put particle moments into pmmnts array for zmmnts(iz+1,)
          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              pmmnts( 1,ip) = wz1
              pmmnts( 2,ip) = pp(2,ip)*wz1
              pmmnts( 3,ip) = pp(6,ip)*wz1
              pmmnts( 4,ip) = pp(10,ip)*wz1
              pmmnts( 7,ip) = pp(4,ip)*wz1
              pmmnts( 8,ip) = pp(8,ip)*wz1
              pmmnts( 9,ip) = pp(12,ip)*wz1
              pmmnts(10,ip) = pp(2,ip)*pp(6,ip)*wz1
              pmmnts(12,ip) = pp(2,ip)**2*wz1
              pmmnts(13,ip) = pp(6,ip)**2*wz1
              pmmnts(14,ip) = pp(10,ip)**2*wz1
              pmmnts(17,ip) = (pp(13,ip)*pp(4,ip))**2*wz1
              pmmnts(18,ip) = (pp(13,ip)*pp(8,ip))**2*wz1
              pmmnts(19,ip) = (pp(13,ip)*pp(12,ip))**2*wz1
              pmmnts(22,ip) = pp(10,ip)*pp(12,ip)*wz1
              pmmnts(25,ip) = pp(2,ip)*pp(12,ip)*wz1
              pmmnts(26,ip) = pp(6,ip)*pp(12,ip)*wz1
              pmmnts(27,ip) = pp(4,ip)*pp(12,ip)*wz1
              pmmnts(28,ip) = pp(8,ip)*pp(12,ip)*wz1
              pmmnts( 5,ip) = pp(4,ip)*vzi*wz1
              pmmnts( 6,ip) = pp(8,ip)*vzi*wz1
              pmmnts(11,ip) = pp(4,ip)*pp(8,ip)*vzi**2*wz1
              pmmnts(15,ip) = (pp(4,ip)*vzi)**2*wz1
              pmmnts(16,ip) = (pp(8,ip)*vzi)**2*wz1
              pmmnts(20,ip) = pp(2,ip)*pp(4,ip)*vzi*wz1
              pmmnts(21,ip) = pp(6,ip)*pp(8,ip)*vzi*wz1
              pmmnts(23,ip) = pp(2,ip)*pp(8,ip)*vzi*wz1
              pmmnts(24,ip) = pp(6,ip)*pp(4,ip)*vzi*wz1
              pmmnts(29,ip) = pp(2,ip)*pp(4,ip)*wz1
              pmmnts(30,ip) = pp(6,ip)*pp(8,ip)*wz1
              pmmnts(31,ip) = pp(2,ip)*pp(8,ip)*wz1
              pmmnts(32,ip) = pp(6,ip)*pp(4,ip)*wz1
              pmmnts(33,ip) = pp(4,ip)*pp(8,ip)*wz1
            enddo
          else
            do ip=1,np
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              pmmnts( 1,ip) = wz1
              pmmnts( 2,ip) = pp(2,ip)*wz1
              pmmnts( 3,ip) = pp(6,ip)*wz1
              pmmnts( 4,ip) = pp(10,ip)*wz1
              pmmnts( 7,ip) = pp(4,ip)*pp(13,ip)*wz1
              pmmnts( 8,ip) = pp(8,ip)*pp(13,ip)*wz1
              pmmnts( 9,ip) = pp(12,ip)*pp(13,ip)*wz1
              pmmnts(10,ip) = pp(2,ip)*pp(6,ip)*wz1
              pmmnts(12,ip) = pp(2,ip)**2*wz1
              pmmnts(13,ip) = pp(6,ip)**2*wz1
              pmmnts(14,ip) = pp(10,ip)**2*wz1
              pmmnts(17,ip) = (pp(13,ip)*pp(4,ip))**2*wz1
              pmmnts(18,ip) = (pp(13,ip)*pp(8,ip))**2*wz1
              pmmnts(19,ip) = (pp(13,ip)*pp(12,ip))**2*wz1
              pmmnts(22,ip) = pp(10,ip)*pp(13,ip)*pp(12,ip)*wz1
              pmmnts(25,ip) = pp(2,ip)*pp(12,ip)*pp(13,ip)*wz1
              pmmnts(26,ip) = pp(6,ip)*pp(12,ip)*pp(13,ip)*wz1
              pmmnts(27,ip) = pp(4,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              pmmnts(28,ip) = pp(8,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              pmmnts( 5,ip) = pp(4,ip)*vzi*wz1
              pmmnts( 6,ip) = pp(8,ip)*vzi*wz1
              pmmnts(11,ip) = pp(4,ip)*pp(8,ip)*vzi**2*wz1
              pmmnts(15,ip) = (pp(4,ip)*vzi)**2*wz1
              pmmnts(16,ip) = (pp(8,ip)*vzi)**2*wz1
              pmmnts(20,ip) = pp(2,ip)*pp(4,ip)*vzi*wz1
              pmmnts(21,ip) = pp(6,ip)*pp(8,ip)*vzi*wz1
              pmmnts(23,ip) = pp(2,ip)*pp(8,ip)*vzi*wz1
              pmmnts(24,ip) = pp(6,ip)*pp(4,ip)*vzi*wz1
              pmmnts(29,ip) = pp(2,ip)*pp(4,ip)*pp(13,ip)*wz1
              pmmnts(30,ip) = pp(6,ip)*pp(8,ip)*pp(13,ip)*wz1
              pmmnts(31,ip) = pp(2,ip)*pp(8,ip)*pp(13,ip)*wz1
              pmmnts(32,ip) = pp(6,ip)*pp(4,ip)*pp(13,ip)*wz1
              pmmnts(33,ip) = pp(4,ip)*pp(8,ip)*pp(13,ip)**2*wz1
            enddo
          endif

          --- deposit moments into array
          do ip=1,np
            do im=1,NUMZMMNT
              zmmnts(iz(ip)+1,im,js) = zmmnts(iz(ip)+1,im,js) + pmmnts(im,ip)
            enddo
          enddo

#else

 -------------------------------------------------------------------------
        --- This method is faster for RISC and pentium machines.

          if (l_momentum_moments .or. .not. lrelativ) then
            do ip=1,np
              --- sum particle moments into zmmnts(iz,,js)
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              i = iz(ip)
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz0
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(1,ip)*wz0
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(5,ip)*wz0
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(9,ip)*wz0
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(3,ip)*wz0
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(7,ip)*wz0
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(11,ip)*wz0
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(1,ip)*pp(5,ip)*wz0
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(1,ip)**2*wz0
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(5,ip)**2*wz0
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(9,ip)**2*wz0
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(18,ip)*pp(3,ip))**2*wz0
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(18,ip)*pp(7,ip))**2*wz0
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(18,ip)*pp(11,ip))**2*wz0
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(9,ip)*pp(11,ip)*wz0
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(1,ip)*pp(11,ip)*wz0
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(5,ip)*pp(11,ip)*wz0
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(3,ip)*pp(11,ip)*wz0
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(7,ip)*pp(11,ip)*wz0
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(3,ip)*vzi*wz0
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(7,ip)*vzi*wz0
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(3,ip)*pp(7,ip)*vzi**2*wz0
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(3,ip)*vzi)**2*wz0
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(7,ip)*vzi)**2*wz0
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(1,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(5,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(1,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(5,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(1,ip)*pp(3,ip)*wz0
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(5,ip)*pp(7,ip)*wz0
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(1,ip)*pp(7,ip)*wz0
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(5,ip)*pp(3,ip)*wz0
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(3,ip)*pp(7,ip)*wz0

              --- sum particle moments into zmmnts(iz+1,,js)
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              i = iz(ip) + 1
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz1
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(2,ip)*wz1
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(6,ip)*wz1
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(10,ip)*wz1
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(4,ip)*wz1
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(8,ip)*wz1
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(12,ip)*wz1
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(2,ip)*pp(6,ip)*wz1
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(2,ip)**2*wz1
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(6,ip)**2*wz1
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(10,ip)**2*wz1
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(13,ip)*pp(4,ip))**2*wz1
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(13,ip)*pp(8,ip))**2*wz1
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(13,ip)*pp(12,ip))**2*wz1
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(10,ip)*pp(12,ip)*wz1
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(2,ip)*pp(12,ip)*wz1
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(6,ip)*pp(12,ip)*wz1
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(4,ip)*pp(12,ip)*wz1
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(8,ip)*pp(12,ip)*wz1
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(4,ip)*vzi*wz1
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(8,ip)*vzi*wz1
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(4,ip)*pp(8,ip)*vzi**2*wz1
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(4,ip)*vzi)**2*wz1
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(8,ip)*vzi)**2*wz1
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(2,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(6,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(2,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(6,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(2,ip)*pp(4,ip)*wz1
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(6,ip)*pp(8,ip)*wz1
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(2,ip)*pp(8,ip)*wz1
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(6,ip)*pp(4,ip)*wz1
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(4,ip)*pp(8,ip)*wz1

            enddo
          else
            do ip=1,np
              --- sum particle moments into zmmnts(iz,,js)
              vzi = 1./(pp(11,ip)+SMALLPOS)
              wz0 = pp(14,ip)
              i = iz(ip)
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz0
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(1,ip)*wz0
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(5,ip)*wz0
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(9,ip)*wz0
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(1,ip)*pp(5,ip)*wz0
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(1,ip)**2*wz0
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(5,ip)**2*wz0
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(9,ip)**2*wz0
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(18,ip)*pp(3,ip))**2*wz0
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(18,ip)*pp(7,ip))**2*wz0
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(18,ip)*pp(11,ip))**2*wz0
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(9,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(1,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(5,ip)*pp(11,ip)*pp(18,ip)*wz0
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(3,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(7,ip)*pp(11,ip)*pp(18,ip)**2*wz0
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(3,ip)*vzi*wz0
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(7,ip)*vzi*wz0
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(3,ip)*pp(7,ip)*vzi**2*wz0
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(3,ip)*vzi)**2*wz0
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(7,ip)*vzi)**2*wz0
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(1,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(5,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(1,ip)*pp(7,ip)*vzi*wz0
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(5,ip)*pp(3,ip)*vzi*wz0
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(1,ip)*pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(5,ip)*pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(1,ip)*pp(7,ip)*pp(18,ip)*wz0
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(5,ip)*pp(3,ip)*pp(18,ip)*wz0
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(3,ip)*pp(7,ip)*pp(18,ip)**2*wz0

              --- sum particle moments into zmmnts(iz+1,,js)
              vzi = 1./(pp(12,ip)+SMALLPOS)
              wz1 = pp(15,ip)
              i = iz(ip) + 1
              zmmnts(i, 1,js) = zmmnts(i, 1,js) + wz1
              zmmnts(i, 2,js) = zmmnts(i, 2,js) + pp(2,ip)*wz1
              zmmnts(i, 3,js) = zmmnts(i, 3,js) + pp(6,ip)*wz1
              zmmnts(i, 4,js) = zmmnts(i, 4,js) + pp(10,ip)*wz1
              zmmnts(i, 7,js) = zmmnts(i, 7,js) + pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i, 8,js) = zmmnts(i, 8,js) + pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i, 9,js) = zmmnts(i, 9,js) + pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,10,js) = zmmnts(i,10,js) + pp(2,ip)*pp(6,ip)*wz1
              zmmnts(i,12,js) = zmmnts(i,12,js) + pp(2,ip)**2*wz1
              zmmnts(i,13,js) = zmmnts(i,13,js) + pp(6,ip)**2*wz1
              zmmnts(i,14,js) = zmmnts(i,14,js) + pp(10,ip)**2*wz1
              zmmnts(i,17,js) = zmmnts(i,17,js) + (pp(13,ip)*pp(4,ip))**2*wz1
              zmmnts(i,18,js) = zmmnts(i,18,js) + (pp(13,ip)*pp(8,ip))**2*wz1
              zmmnts(i,19,js) = zmmnts(i,19,js) + (pp(13,ip)*pp(12,ip))**2*wz1
              zmmnts(i,22,js) = zmmnts(i,22,js) + pp(10,ip)*pp(13,ip)*pp(12,ip)*wz1
              zmmnts(i,25,js) = zmmnts(i,25,js) + pp(2,ip)*pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,26,js) = zmmnts(i,26,js) + pp(6,ip)*pp(12,ip)*pp(13,ip)*wz1
              zmmnts(i,27,js) = zmmnts(i,27,js) + pp(4,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              zmmnts(i,28,js) = zmmnts(i,28,js) + pp(8,ip)*pp(12,ip)*pp(13,ip)**2*wz1
              zmmnts(i, 5,js) = zmmnts(i, 5,js) + pp(4,ip)*vzi*wz1
              zmmnts(i, 6,js) = zmmnts(i, 6,js) + pp(8,ip)*vzi*wz1
              zmmnts(i,11,js) = zmmnts(i,11,js) + pp(4,ip)*pp(8,ip)*vzi**2*wz1
              zmmnts(i,15,js) = zmmnts(i,15,js) + (pp(4,ip)*vzi)**2*wz1
              zmmnts(i,16,js) = zmmnts(i,16,js) + (pp(8,ip)*vzi)**2*wz1
              zmmnts(i,20,js) = zmmnts(i,20,js) + pp(2,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,21,js) = zmmnts(i,21,js) + pp(6,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,23,js) = zmmnts(i,23,js) + pp(2,ip)*pp(8,ip)*vzi*wz1
              zmmnts(i,24,js) = zmmnts(i,24,js) + pp(6,ip)*pp(4,ip)*vzi*wz1
              zmmnts(i,29,js) = zmmnts(i,29,js) + pp(2,ip)*pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i,30,js) = zmmnts(i,30,js) + pp(6,ip)*pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i,31,js) = zmmnts(i,31,js) + pp(2,ip)*pp(8,ip)*pp(13,ip)*wz1
              zmmnts(i,32,js) = zmmnts(i,32,js) + pp(6,ip)*pp(4,ip)*pp(13,ip)*wz1
              zmmnts(i,33,js) = zmmnts(i,33,js) + pp(4,ip)*pp(8,ip)*pp(13,ip)**2*wz1

            enddo
          endif

 -------------------------------------------------------------------------
#endif

          --- End of Z moments accumulation
        endif

        deallocate(pp,iz)

      endif

      if (itask == 3 .and. .not. laccumulate_zmoments) then
        call getzmmnt(np,xp,yp,zp,uxp,uyp,uzp,gaminv,q,m,w,dt,dtscale,
     &                itask,nplive,
     &                uxpo,uypo,uzpo,is,isid,ismax,maxp,minp,zmmnts0,zmmnts)
      endif

!$OMP ATOMIC
      momentstime = momentstime + (wtime() - timetemp)

      return
      end

      subroutine get_zmmnts_stations(ns,jslist,pgroup,nstations,zmin,zmax,vfrm,
     &           pnum,xbar,ybar,xpbar,ypbar,x2,y2,xp2,yp2,xxp,yyp)
      use ParticleGroupmodule
      use Particles, only: xoldpid, yoldpid, zoldpid, uxoldpid, uyoldpid, uzoldpid, wpid
      use InGen, only: dt
      use Picglb, only: time
      
      --- arguments
      integer(ISZ) :: ns, nstations, jslist(ns)
      real(kind=8) :: zmin, zmax, vfrm
      real(kind=8), dimension(nstations) :: pnum,xbar,ybar,xpbar,ypbar,x2,y2,xp2,yp2,xxp,yyp
      type(ParticleGroup):: pgroup
      
      --- local variables
      integer(ISZ) :: j,ip,is,iz,izo,ist
      real(kind=8) :: dz,z,zo,ddz,sw,swo,zs,x,y,xo,yo,vx,vy,vz,xp,yp,w,
     &                vxo,vyo,vzo,xpo,ypo
      
      dz = (zmax-zmin)/nstations
      do j=1,ns
        is=jslist(j)+1
        do ip = pgroup%ins(is), pgroup%ins(is) + pgroup%nps(is) - 1
          z  = pgroup%zp(ip)-vfrm*time
          zo = pgroup%pid(ip,zoldpid)-vfrm*(time-dt*pgroup%ndts(is))
          iz = int((z-zmin)/dz)
          izo = int((zo-zmin)/dz)
          if(iz==izo) cycle
          ddz = z-zo
          if(ddzɬ.) then
            ist = iz+1
            zs = zmin+(ist-1)*dz
            sw = (zs-zo)/ddz
          else
            ist = izo+1
            zs = zmin+(ist-1)*dz
            sw = (zo-zs)/ddz
          end if
          if (istə .or. ist>nstations) cycle 
          swo = 1.-sw
          x  = pgroup%xp(ip)
          y  = pgroup%yp(ip)
          xo = pgroup%pid(ip,xoldpid)
          yo = pgroup%pid(ip,yoldpid)
           vx=(x-xo)/(dt*pgroup%ndts(is))
           vy=(y-yo)/(dt*pgroup%ndts(is))
           vz=(z-zo)/(dt*pgroup%ndts(is))
          vx  = pgroup%uxp(ip)
          vy  = pgroup%uyp(ip)
          vz  = pgroup%uzp(ip)
          vxo = pgroup%pid(ip,uxoldpid)
          vyo = pgroup%pid(ip,uyoldpid)
          vzo = pgroup%pid(ip,uzoldpid)
          xp=vx/vz
          yp=vy/vz
          xpo=vxo/vzo
          ypo=vyo/vzo
          x = sw*x + swo*xo
          y = sw*y + swo*yo
          xp = sw*xp + swo*xpo
          yp = sw*yp + swo*ypo
          w = pgroup%sw(is)
          if(wpidɬ) w = w*pgroup%pid(ip,wpid)
          pnum (ist) = pnum (ist) + w
          xbar (ist) = xbar (ist) + w*x
          ybar (ist) = ybar (ist) + w*y
          xpbar(ist) = xpbar(ist) + w*xp
          ypbar(ist) = ypbar(ist) + w*yp
          x2   (ist) = x2   (ist) + w*x*x
          y2   (ist) = y2   (ist) + w*y*y
          xp2  (ist) = xp2  (ist) + w*xp*xp
          yp2  (ist) = yp2  (ist) + w*yp*yp
          xxp  (ist) = xxp  (ist) + w*x*xp
          yyp  (ist) = yyp  (ist) + w*y*yp
        end do
      end do

      return
      end subroutine get_zmmnts_stations

[getextpart] [padvnc3d]
      subroutine getextrapolatedparticles(ipmin,np,pgroup,npid,
     &                                    dt,uxpo,uypo,uzpo,isid,time,zbeam)
      use ParticleGroupmodule
      use Z_Moments,Only: zmmntmin,zmmntmax,dzm
      use ExtPart
      integer(ISZ):: ipmin,np,npid,isid
      type(ParticleGroup):: pgroup
      real(kind=8):: dt,time,zbeam
      real(kind=8):: uxpo(np),uypo(np),uzpo(np)

  Given the information at the end of a time step, the current position and
  velocity and the previous velocity, extrapolate particles to the
  positions given (in the lab frame) by zzepwin.
  The extrapolation is approximate. A better version might use the full
  advance capabilities (from padvnc3d) but that would probably be overkill.
  --- az calculated from current and previous Vz and dt
  --- dti calculated by assuming constant az and 
  ---   computing times at which z crosses lab window
  --- V extrapolated using current V and estimated A
  ---   (this avoids having to use E and B to extrapolate V).
  --- X extrapolated using constant V and estimated A

      real(kind=8):: zzep,wzep
      real(kind=8):: oneondt,dtstar,delz,vx,vy,vz,ax,ay,az,dp,dm
      real(kind=8):: vznp1,zznp1,delznp1,dtstarnp1
      integer(ISZ):: iw,ip,nn
      integer(ISZ):: nextpid

      real(kind=8):: timetemp,wtime

      --- Check if the pgroup had been initialized.
      if (isid == 0) return

      if (dt .ne. 0.) then
        oneondt = 1./dt
      endif

      if (lepsaveonce) then
        --- Make sure that epflagpid is setup. This might not be setup if
        --- the lepsaveonce was set after the generate.
        if (epflagpid == 0) then
          epflagpid = nextpid()
          call setuppgroup(pgroup)
        endif
      endif

      if (npidepmax .ne. npid) then
        npidepmax = npid
        call gchange("ExtPart",0)
      endif

      --- Loop over windows
      do iw=1,nepwin

        --- Get the location of the extrapolation plane.
        if (izepwin(iw) >= 0) then
          zzep = zmmntmin + izepwin(iw)*dzm + zbeam
          wzep = dzm
        else
          zzep = zzepwin(iw)
          wzep = wzepwin(iw)
        endif

        --- Skip windows that are outside of the grid frame.
        if (zzep+wzep <= zmmntmin+zbeam .or.
     &      zzep-wzep >= zmmntmax+zbeam) cycle

        --- Make sure that there is enough room - assuming that all particles
        --- could be saved.
        if (nep(iw,isid)+np > nepmax) then
          nepmax = nep(iw,isid)+np
          call gchange("ExtPart",0)
        endif

        --- extrapolate to window if near it
        do ip=ipmin,ipmin+np-1
          if (pgroup%zp(ip) < zzep-wzep .or.
     &        pgroup%zp(ip) > zzep+wzep) cycle

          --- Check if particle has already been saved in this window.
          --- This only works if multiple windows don't overlap.
          if (lepsaveonce) then
            if (pgroup%pid(ip,epflagpid) == iw) cycle
          endif

          vx = pgroup%uxp(ip)*pgroup%gaminv(ip)
          vy = pgroup%uyp(ip)*pgroup%gaminv(ip)
          vz = pgroup%uzp(ip)*pgroup%gaminv(ip)

          if (dt == 0.) then
            ax = 0.
            ay = 0.
            az = 0.
          else
            ax = (pgroup%uxp(ip) - uxpo(ip-ipmin+1))*oneondt
            ay = (pgroup%uyp(ip) - uypo(ip-ipmin+1))*oneondt
            az = (pgroup%uzp(ip) - uzpo(ip-ipmin+1))*oneondt
          endif

          delz = (pgroup%zp(ip) - zzep)
          if (az == 0.) then
            if (vz == 0.) cycle
            dtstar = -delz/vz
          else
            if ((vz**2 - 2.*az*delz) > 0.) then
              dm = (-vz - sqrt(vz**2 - 2.*az*delz))/az
              dp = (-vz + sqrt(vz**2 - 2.*az*delz))/az
              if (abs(dp) < abs(dm)) then
                dtstar = dp
              else
                dtstar = dm
              endif
            else
              --- In this case, because of az, the particle will never
              --- cross the zzep plane, so effectively decrease az
              --- so that the particle would just touch the plane.
              --- This is very kludgy and ad-hoc, but the calculation
              --- of az is already equally kludgy.
              dtstar = -vz/az
            endif
          endif

          --- Make an estimate of where the particle will be on the next step.
          --- This assumes that az will be constant.
          --- If it will be much closer to the z plane (smaller dtstar),
          --- then wait until then to save the particle.
          if (lepsaveonce) then
            vznp1 = vz + 0.5*az*dt
            zznp1 = pgroup%zp(ip) + vznp1*dt
            vznp1 = vznp1 + 0.5*az*dt
            delznp1 = (zznp1 - zzep)
            dtstarnp1 = 2.*delznp1/(sqrt(vznp1**2 - 2.*az*delznp1) + vznp1)
            if (az == 0.) then
              if (vznp1 == 0.) cycle
              dtstarnp1 = -delznp1/vznp1
            else
              if ((vznp1**2 - 2.*az*delznp1) > 0.) then
                dm = (-vznp1 - sqrt(vznp1**2 - 2.*az*delznp1))/az
                dp = (-vznp1 + sqrt(vznp1**2 - 2.*az*delznp1))/az
                if (abs(dp) < abs(dm)) then
                  dtstarnp1 = dp
                else
                  dtstarnp1 = dm
                endif
              else
                dtstarnp1 = -vznp1/az
              endif
            endif
            --- The factor of 0.75 is an adhoc parameter, an attempt to
            --- prevent particles from being missed if the estimate of
            --- dtstarnp1 is way off.
            if (abs(dtstarnp1) < abs(dtstar)*epclosenessfactor) cycle
          endif

          nep(iw,isid) = nep(iw,isid) + 1
          nn = nep(iw,isid)

          if (lepsaveonce) then
            pgroup%pid(ip,epflagpid) = iw
          endif

          tep(nn,iw,isid) = time + dtstar
          xep(nn,iw,isid) = pgroup%xp(ip) + vx*dtstar + 0.5*ax*dtstar**2
          yep(nn,iw,isid) = pgroup%yp(ip) + vy*dtstar + 0.5*ay*dtstar**2
          uxep(nn,iw,isid) = vx + ax*dtstar
          uyep(nn,iw,isid) = vy + ay*dtstar
          uzep(nn,iw,isid) = vz + az*dtstar
          if (npid > 0) pidep(nn,:,iw,isid) = pgroup%pid(ip,:)

        enddo
      enddo

      return
      end

[padvnc3d]
      subroutine getcrossingparticles(ipmin,np,pgroup,dt,isid,time,zbeam)
      use Subtimerstop
      use ZCrossingParticles
      use ParticleGroupmodule
      use Z_Moments,Only: zmmntmin,zmmntmax,dzm
      integer(ISZ):: ipmin,np,isid
      type(ParticleGroup):: pgroup
      real(kind=8):: dt,time,zbeam

  Given the position at time level (i-1) and the velocity at time
  level (i-1/2), get particles that will cross the positions given
  (in the lab frame) by zzzcwin over the next time step. This does a
  fraction of the standard position advance to find the transverse
  positions. Since this method uses the same advance equation as the
  standard particle advance, the data accumulated from timestep to
  timestep match up seamlessly.
  Note that there is no need for the lepsaveonce option, since this
  routine only grabs particles that actually are crossing the z location.

      real(kind=8):: zzzc
      real(kind=8):: dtstar,vx,vy,vz,zold,znew
      integer(ISZ):: iw,ip,nn
      integer(ISZ):: nextpid
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- If there are no positions defined, return immediately
      if (nzcwin == 0) return

      --- Check if the pgroup had been initialized.
      if (isid == 0) return

      if (npidzcmax .ne. pgroup%npid) then
        npidzcmax = pgroup%npid
        call gchange("ZCrossingParticles",0)
      endif

      --- Loop over windows
      do iw=1,nzcwin

        --- Get the location of the extrapolation plane.
        if (izzcwin(iw) >= 0) then
          zzzc = zmmntmin + izzcwin(iw)*dzm + zbeam
        else
          zzzc = zzzcwin(iw)
        endif

        --- Skip windows that are outside of the grid frame.
        if (zzzc+dzm <= zmmntmin+zbeam .or.
     &      zzzc-dzm >= zmmntmax+zbeam) cycle

        --- Make sure that there is enough room - assuming that all particles
        --- could be saved.
        if (nzc(iw,isid)+np > nzcmax) then
          nzcmax = nzc(iw,isid)+np
          call gchange("ZCrossingParticles",0)
        endif

        --- extrapolate to window if near it
        do ip=ipmin,ipmin+np-1
          vz = pgroup%uzp(ip)*pgroup%gaminv(ip)
          zold = pgroup%zp(ip)
          znew = pgroup%zp(ip) + dt*vz
          if (.not. ((zold <= zzzc .and. zzzc <  znew) .or.
     &               (znew <  zzzc .and. zzzc <= zold))
     &        .or. vz == 0.) cycle

          dtstar = (zzzc - zold)/vz

          vx = pgroup%uxp(ip)*pgroup%gaminv(ip)
          vy = pgroup%uyp(ip)*pgroup%gaminv(ip)

          nzc(iw,isid) = nzc(iw,isid) + 1
          nn = nzc(iw,isid)

          uxzc(nn,iw,isid) = vx
          uyzc(nn,iw,isid) = vy
          uzzc(nn,iw,isid) = vz
          tzc(nn,iw,isid) = time + dtstar
          xzc(nn,iw,isid) = pgroup%xp(ip) + vx*dtstar
          yzc(nn,iw,isid) = pgroup%yp(ip) + vy*dtstar
          if (pgroup%npid > 0) pidzc(nn,:,iw,isid) = pgroup%pid(ip,:)

        enddo
      enddo

!$OMP MASTER
      if (ltoptimesubs) timegetcrossingparticles = timegetcrossingparticles + wtime() - substarttime
!$OMP END MASTER
      return
      end

[dolabwn] [w3dgen] [wxygen]
      subroutine initlabwn(ntlabwnestimate)
      use InDiag
      use Hist
      use Lab_Moments
      integer(ISZ):: ntlabwnestimate

  Initialize the lab window moments

      integer(ISZ):: iwin,nl

      nl = nlabwn
      nlabwn = 0
      do iwin = 1, nl
         if (zlw(iwin) < LARGEPOS) nlabwn = iwin
      enddo

      if (nlabwn > 0) then
        --- Note that the original code used abs(nhist), but it was giving
        --- the HPUX f90 compiler fits. The negative nhist feature was
        --- made obsolete anyway.
        if (itlabwn == 0) itlabwn = max(1,nhist)
        --- ntlabwnestimate is an estimate of the number of time steps across
        --- the system, i.e. the number of times that data will be saved
        --- assuming nhist=1. That then needs to be scale by how often the
        --- data is collected.
        if (ntlabwn == 0) ntlabwn = max(1,ntlabwnestimate/itlabwn)
      endif
      call gchange("Lab_Moments", 0)

      return
      end

[padvncxy] [step3d]
      subroutine getlabwn()
      use Beam_acc
      use InDiag
      use Picglb
      use Z_Moments
      use Z_arrays
      use Lab_Moments
 
   Sets moments for species 1 in all axial windows in the lab frame
   Also saves appropriate moments in time history arrays.
   Get info from z moments
   The data is only saved every itlabwn steps, even if the moments
   are actually calculated more or less often.

      real(kind=8):: w0,w1,w0z,w1z
      integer(ISZ):: i,ilw,iz,izz,js

      if (nlabwn == 0) return
      if (mod(it,itlabwn) > 0) return

      --- Make sure that the lab window moments are set to have the same
      --- number of species as the z moments.
      if (nslabwn /= nszmmnt) then
        nslabwn = nszmmnt
        call gchange("Lab_Moments",0)
      endif

      do i = 1, nlabwn
        if (zlw(i) > (zbeam+zmmntmin) .and.
     &      zlw(i) < (zbeam+zmmntmax)) then
          if (maxval(ilabwn(i,:)) == ntlabwn) then
            ntlabwn = ntlabwn + max(10,int(ntlabwn/10))
            call gchange("Lab_Moments",0)
          endif
          iz = (zlw(i) - zbeam - zmmntmin)*dzmi
          w0 = (zlw(i) - zbeam - zmmntmin)*dzmi - iz
          w1 = 1. - w0
          izz = (zlw(i) - zbeam - zzmin)*dzzi
          w0z = (zlw(i) - zbeam - zzmin)*dzzi - izz
          w1z = 1. - w0z
          do js=0,nslabwn
            if (zmomentscalculated(js)) then
              ilabwn(i,js) = ilabwn(i,js) + 1
              ilw = ilabwn(i,js)
              timelw(ilw,i,js)    = time
              pnumlw(ilw,i,js)    = w1*pnumz(iz,js)    + w0*pnumz(iz+1,js)
              xbarlw(ilw,i,js)    = w1*xbarz(iz,js)    + w0*xbarz(iz+1,js)
              ybarlw(ilw,i,js)    = w1*ybarz(iz,js)    + w0*ybarz(iz+1,js)
              zbarlw(ilw,i,js)    = w1*zbarz(iz,js)    + w0*zbarz(iz+1,js)
              xpbarlw(ilw,i,js)   = w1*xpbarz(iz,js)   + w0*xpbarz(iz+1,js)
              ypbarlw(ilw,i,js)   = w1*ypbarz(iz,js)   + w0*ypbarz(iz+1,js)
              vxbarlw(ilw,i,js)   = w1*vxbarz(iz,js)   + w0*vxbarz(iz+1,js)
              vybarlw(ilw,i,js)   = w1*vybarz(iz,js)   + w0*vybarz(iz+1,js)
              vzbarlw(ilw,i,js)   = w1*vzbarz(iz,js)   + w0*vzbarz(iz+1,js)
              xybarlw(ilw,i,js)   = w1*xybarz(iz,js)   + w0*xybarz(iz+1,js)
              xypbarlw(ilw,i,js)  = w1*xypbarz(iz,js)  + w0*xypbarz(iz+1,js)
              yxpbarlw(ilw,i,js)  = w1*yxpbarz(iz,js)  + w0*yxpbarz(iz+1,js)
              xpypbarlw(ilw,i,js) = w1*xpypbarz(iz,js) + w0*xpypbarz(iz+1,js)
              xsqbarlw(ilw,i,js)  = w1*xsqbarz(iz,js)  + w0*xsqbarz(iz+1,js)
              ysqbarlw(ilw,i,js)  = w1*ysqbarz(iz,js)  + w0*ysqbarz(iz+1,js)
              zsqbarlw(ilw,i,js)  = w1*zsqbarz(iz,js)  + w0*zsqbarz(iz+1,js)
              xpsqbarlw(ilw,i,js) = w1*xpsqbarz(iz,js) + w0*xpsqbarz(iz+1,js)
              ypsqbarlw(ilw,i,js) = w1*ypsqbarz(iz,js) + w0*ypsqbarz(iz+1,js)
              vxsqbarlw(ilw,i,js) = w1*vxsqbarz(iz,js) + w0*vxsqbarz(iz+1,js)
              vysqbarlw(ilw,i,js) = w1*vysqbarz(iz,js) + w0*vysqbarz(iz+1,js)
              vzsqbarlw(ilw,i,js) = w1*vzsqbarz(iz,js) + w0*vzsqbarz(iz+1,js)
              xxpbarlw(ilw,i,js)  = w1*xxpbarz(iz,js)  + w0*xxpbarz(iz+1,js)
              xvxbarlw(ilw,i,js)  = w1*xvxbarz(iz,js)  + w0*xvxbarz(iz+1,js)
              yvybarlw(ilw,i,js)  = w1*yvybarz(iz,js)  + w0*yvybarz(iz+1,js)
              yypbarlw(ilw,i,js)  = w1*yypbarz(iz,js)  + w0*yypbarz(iz+1,js)
              zvzbarlw(ilw,i,js)  = w1*zvzbarz(iz,js)  + w0*zvzbarz(iz+1,js)
              xvzbarlw(ilw,i,js)  = w1*xvzbarz(iz,js)  + w0*xvzbarz(iz+1,js)
              yvzbarlw(ilw,i,js)  = w1*yvzbarz(iz,js)  + w0*yvzbarz(iz+1,js)
              vxvzbarlw(ilw,i,js) = w1*vxvzbarz(iz,js) + w0*vxvzbarz(iz+1,js)
              vyvzbarlw(ilw,i,js) = w1*vyvzbarz(iz,js) + w0*vyvzbarz(iz+1,js)
              xrmslw(ilw,i,js)    = w1*xrmsz(iz,js)    + w0*xrmsz(iz+1,js)
              yrmslw(ilw,i,js)    = w1*yrmsz(iz,js)    + w0*yrmsz(iz+1,js)
              zrmslw(ilw,i,js)    = w1*zrmsz(iz,js)    + w0*zrmsz(iz+1,js)
              rrmslw(ilw,i,js)    = w1*rrmsz(iz,js)    + w0*rrmsz(iz+1,js)
              xprmslw(ilw,i,js)   = w1*xprmsz(iz,js)   + w0*xprmsz(iz+1,js)
              yprmslw(ilw,i,js)   = w1*yprmsz(iz,js)   + w0*yprmsz(iz+1,js)
              epsxlw(ilw,i,js)    = w1*epsxz(iz,js)    + w0*epsxz(iz+1,js)
              epsylw(ilw,i,js)    = w1*epsyz(iz,js)    + w0*epsyz(iz+1,js)
              epszlw(ilw,i,js)    = w1*epszz(iz,js)    + w0*epszz(iz+1,js)
              epsnxlw(ilw,i,js)   = w1*epsnxz(iz,js)   + w0*epsnxz(iz+1,js)
              epsnylw(ilw,i,js)   = w1*epsnyz(iz,js)   + w0*epsnyz(iz+1,js)
              epsnzlw(ilw,i,js)   = w1*epsnzz(iz,js)   + w0*epsnzz(iz+1,js)
              epsrlw(ilw,i,js)    = w1*epsrz(iz,js)    + w0*epsrz(iz+1,js)
              epsglw(ilw,i,js)    = w1*epsgz(iz,js)    + w0*epsgz(iz+1,js)
              epshlw(ilw,i,js)    = w1*epshz(iz,js)    + w0*epshz(iz+1,js)
              epsnrlw(ilw,i,js)   = w1*epsnrz(iz,js)   + w0*epsnrz(iz+1,js)
              epsnglw(ilw,i,js)   = w1*epsngz(iz,js)   + w0*epsngz(iz+1,js)
              epsnhlw(ilw,i,js)   = w1*epsnhz(iz,js)   + w0*epsnhz(iz+1,js)
              vxrmslw(ilw,i,js)   = w1*vxrmsz(iz,js)   + w0*vxrmsz(iz+1,js)
              vyrmslw(ilw,i,js)   = w1*vyrmsz(iz,js)   + w0*vyrmsz(iz+1,js)
              vzrmslw(ilw,i,js)   = w1*vzrmsz(iz,js)   + w0*vzrmsz(iz+1,js)
              currlw(ilw,i,js)    = w1z*curr(izz,js)   + w0z*curr(izz+1,js)
              lostparslw(ilw,i,js)=w1z*lostpars(izz,js)+w0z*lostpars(izz+1,js)
              if (js < nslabwn) then
                --- This special coding is needed since the linecharge
                --- is not calculated for each species.
                linechglw(ilw,i,js)  =
     &                     w1z*pnumz(izz  ,js)*dzmi*zmmntsw(js)*zmmntsq(js) +
     &                     w0z*pnumz(izz+1,js)*dzmi*zmmntsw(js)*zmmntsq(js)
              else
                linechglw(ilw,i,js) = w1z*linechg(izz  ) + w0z*linechg(izz+1)
              endif
            endif
          enddo
        endif
      enddo

      return
      end

[em2d_step] [epush] [inject3d] [padvncxy] [pushrz]
      subroutine gammaadv(np,gaminv,uxp,uyp,uzp,gamadv,lrelativ)
      use Constant
      integer(ISZ):: np
      real(kind=8):: gaminv(np),uxp(np),uyp(np),uzp(np)
      character(8):: gamadv
      logical(ISZ):: lrelativ

   Advance Gamma Inverse from it-3/2 to it-1/2 using method given by gamadv


      real(kind=8):: clghtisq,usq
      integer(ISZ):: ip

   If not using relativity then don't do anything
      if (lrelativ) then

      clghtisq = 1./clight**2

   Standard gamma advance
      if (gamadv == "stndrd") then
        do ip=1,np
          usq = (uxp(ip)**2 + uyp(ip)**2 + uzp(ip)**2)*clghtisq
          gaminv(ip) = 1./sqrt(1. + usq)
        enddo
      endif

   Fast gamma advance, version 1
      if (gamadv == "fast 1") then
        do ip=1,np
          usq = (uxp(ip)**2 + uyp(ip)**2 + uzp(ip)**2)*clghtisq
          gaminv(ip) = gaminv(ip)*1.5 - (1.+usq)/gaminv(ip)*0.5
        enddo
      endif

   Fast gamma advance, version 2, fastest, but least accurate
      if (gamadv == "fast 2") then
        do ip=1,np
          gaminv(ip)  = 1. - 0.5*(uxp(ip)**2+uyp(ip)**2+uzp(ip)**2)*clghtisq
        enddo
      endif

      endif

      return
      end
!=============================================================================

      subroutine setu_in_boosted_frame3d(np,uxp,uyp,uzp,gaminv,uxf,uyf,uzf,gammaf)
! --- Computes u={uxp,uyp,uzp} in boosted frame {uxf,uyf,uzf}
      use Constant
      integer(ISZ):: np
      real(kind=8):: uxp(np),uyp(np),uzp(np),gaminv(np)
      real(kind=8):: uxf(np),uyf(np),uzf(np),gammaf(np)
      
      integer(ISZ) :: ip
      real(kind=8):: gammapr,vx,vy,vz,v,vvp_x,vvp_y,vvp_z,vvp,vvvp_x,vvvp_y,vvvp_z,vvvp
      real(kind=8):: vxp,vyp,vzp,vp_par,vp_perp,vpr_par,vpr_perp,gaminvf,const,invclightsq
      logical(ISZ):: l_vperp
      invclightsq = 1./(clight*clight)
      do ip=1,np
        if(gammaf(ip)ə.00000001) cycle
        
!  --- get velocities
        vxp = uxp(ip)*gaminv(ip)
        vyp = uyp(ip)*gaminv(ip)
        vzp = uzp(ip)*gaminv(ip)

        gaminvf = 1./gammaf(ip)
        vx=uxf(ip)*gaminvf
        vy=uyf(ip)*gaminvf
        vz=uzf(ip)*gaminvf
        const = 1./(1.-(vxp*vx+vyp*vy+vzp*vz)*invclightsq)         

!   --- get unit vector v
        v = sqrt(vx*vx+vy*vy+vz*vz)
        vx = vx/v; vy = vy/v; vz = vz/v
        
!   --- get unit vector v x vp
        vvp_x = vy*vzp-vz*vyp
        vvp_y = vz*vxp-vx*vzp
        vvp_z = vx*vyp-vy*vxp
        vvp = sqrt(vvp_x*vvp_x+vvp_y*vvp_y+vvp_z*vvp_z)
!        if(vvpə.e-10*v) then
        if(vvp==0.) then
          l_vperp=.false.
        else
          l_vperp=.true.
        end if
        if(l_vperp) then
          vvp_x = vvp_x/vvp; vvp_y = vvp_y/vvp; vvp_z = vvp_z/vvp
 
!   --- get vnit vector v x (v x vp)
          vvvp_x = vy*vvp_z-vz*vvp_y
          vvvp_y = vz*vvp_x-vx*vvp_z
          vvvp_z = vx*vvp_y-vy*vvp_x
          vvvp = sqrt(vvvp_x*vvvp_x+vvvp_y*vvvp_y+vvvp_z*vvvp_z)
          vvvp_x = vvvp_x/vvvp; vvvp_y = vvvp_y/vvvp; vvvp_z = vvvp_z/vvvp
        end if
        
 ! get particle velocity vp in base {v,v x (v x vp)}, i.e. {v_par,v_perp}
        vp_par  = vxp*vx + vyp*vy + vzp*vz 
        if(l_vperp) vp_perp = vxp*vvvp_x + vyp*vvvp_y + vzp*vvvp_z 
        vpr_par = (vp_par-v)*const
        const = const*gaminvf
        if(l_vperp) then
          vpr_perp = vp_perp*const
        else
          vpr_perp = 0.
        endif
        gammapr = 1./sqrt(1.-(vpr_par**2+vpr_perp**2)*invclightsq)
        uxp(ip) = (vpr_par*vx+vpr_perp*vvvp_x)*gammapr
        uyp(ip) = (vpr_par*vy+vpr_perp*vvvp_y)*gammapr
        uzp(ip) = (vpr_par*vz+vpr_perp*vvvp_z)*gammapr
        gaminv(ip) = 1./sqrt(1.+(uxp(ip)*uxp(ip)+uyp(ip)*uyp(ip)+uzp(ip)*uzp(ip))*invclightsq)
      enddo

      return
      end
!=============================================================================

[inject3d] [padvnc3d] [positionadvance3d]
      subroutine setu_in_uzboosted_frame3d(np,uxp,uyp,uzp,gaminv,uzf,gammaf)
! --- Computes u={uxp,uyp,uzp} in boosted frame {0,0,uzf}
      use Constant
      integer(ISZ):: np
      real(kind=8):: uxp(np),uyp(np),uzp(np),gaminv(np)
      real(kind=8):: uzf,gammaf
      
      integer(ISZ) :: ip
      real(kind=8):: gammapr,vz,v,vvp_x,vvp_y,vvp,vvvp_x,vvvp_y,vvvp
      real(kind=8):: vxp,vyp,vzp,vp_par,vp_perp,vpr_par,vpr_perp,gaminvf,const,invclightsq
      logical(ISZ):: l_vperp
      invclightsq = 1./(clight*clight)
      if(gammafə.00000001) return
      do ip=1,np
        
!  --- get velocities
        vxp = uxp(ip)*gaminv(ip)
        vyp = uyp(ip)*gaminv(ip)
        vzp = uzp(ip)*gaminv(ip)

        gaminvf = 1./gammaf
        vz=uzf*gaminvf
        const = 1./(1.-(vzp*vz)*invclightsq)         

!   --- get unit vector v
        v = sqrt(vz*vz)
        vz = vz/v
        
!   --- get unit vector v x vp
        vvp_x = -vz*vyp
        vvp_y = vz*vxp
        vvp = sqrt(vvp_x*vvp_x+vvp_y*vvp_y)
!        if(vvpə.e-10*v) then
        if(vvp==0.) then
          l_vperp=.false.
        else
          l_vperp=.true.
        end if
        if(l_vperp) then
          vvp_x = vvp_x/vvp; vvp_y = vvp_y/vvp
 
!   --- get vnit vector v x (v x vp)
          vvvp_x = -vz*vvp_y
          vvvp_y = vz*vvp_x
          vvvp = sqrt(vvvp_x*vvvp_x+vvvp_y*vvvp_y)
          vvvp_x = vvvp_x/vvvp; vvvp_y = vvvp_y/vvvp
        else
          vvvp_x = 0.
          vvvp_y = 0.
        end if
        
 ! get particle velocity vp in base {v,v x (v x vp)}, i.e. {v_par,v_perp}
        vp_par  = vzp*vz 
        if(l_vperp) vp_perp = vxp*vvvp_x + vyp*vvvp_y 
        vpr_par = (vp_par-v)*const
        const = const*gaminvf
        if(l_vperp) then
          vpr_perp = vp_perp*const
        else
          vpr_perp = 0.
        endif
        gammapr = 1./sqrt(1.-(vpr_par**2+vpr_perp**2)*invclightsq)
        uxp(ip) = (vpr_perp*vvvp_x)*gammapr
        uyp(ip) = (vpr_perp*vvvp_y)*gammapr
        uzp(ip) = (vpr_par*vz)*gammapr
        gaminv(ip) = 1./sqrt(1.+(uxp(ip)*uxp(ip)+uyp(ip)*uyp(ip)+uzp(ip)*uzp(ip))*invclightsq)
      enddo

      return
      end
!=============================================================================

[inject3d] [padvnc3d]
      subroutine seteb_in_boosted_frame(np,ex,ey,ez,bx,by,bz,uxf,uyf,uzf,gammaf)
! --- Get E and B in boosted frame
      use Constant
      integer(ISZ):: np
      real(kind=8):: ex(np),ey(np),ez(np),bx(np),by(np),bz(np)
      real(kind=8):: uxf,uyf,uzf,gammaf

      integer(ISZ):: ip
      real(kind=8):: const,invclightsq,ex0,ey0,ez0,bx0,by0,bz0

      invclightsq = 1./(clight*clight)
      do ip=1,np
        ex0=ex(ip);ey0=ey(ip);ez0=ez(ip)
        bx0=bx(ip);by0=by(ip);bz0=bz(ip)
        const = -invclightsq*(uxf*ex0+uyf*ey0+uzf*ez0)/(1.+gammaf)
        ex(ip) = (gammaf*ex0 + uyf*bz0 - uzf*by0 + const*uxf)
        ey(ip) = (gammaf*ey0 + uzf*bx0 - uxf*bz0 + const*uyf)
        ez(ip) = (gammaf*ez0 + uxf*by0 - uyf*bx0 + const*uzf)
        const = -invclightsq*(uxf*bx0+uyf*by0+uzf*bz0)/(1.+gammaf)
        bx(ip) = (gammaf*bx0 - invclightsq*( uyf*ez0 - uzf*ey0) + const*uxf)
        by(ip) = (gammaf*by0 - invclightsq*( uzf*ex0 - uxf*ez0) + const*uyf)
        bz(ip) = (gammaf*bz0 - invclightsq*( uxf*ey0 - uyf*ex0) + const*uzf)
      enddo

      return
      end

[processlostpart] [setcurr]
      subroutine checkz_arrays(ns)
      use Z_arrays
      use InDiag,Only: lspeciesmoments
      integer(ISZ):: ns

      --- Set nszarr and update array allocation if needed
      if (lspeciesmoments) then
        --- Check if the moments are to be calculated separately for
        --- each species. If so, check if nszarr already has been set
        --- appropriately. If not, set it and allocate the arrays.
        --- If only one species, then don't have separate species data.
        if (nszarr < ns .and. ns > 1) then
          nszarr = ns
          call gchange("Z_arrays",0)
        endif
      else
        if (nszarr /= 0) then
          nszarr = 0
          call gchange("Z_arrays",0)
        endif
      endif

      return
      end

[padvnc3d] [padvncrz] [setcurrxy] [wrzgen]
      subroutine setcurr(pgroup,zbeam,ns,wpid,lspeciesmoments,lzero,bound0)
      use Subtimerstop
      use ParticleGroupmodule
      use Z_arrays
      type(ParticleGroup):: pgroup
      integer(ISZ):: ns,wpid,bound0
      real(kind=8):: zbeam
      logical(ISZ):: lspeciesmoments,lzero

   Sets 1d beam current as a function of z, directly from particle data

      integer(ISZ):: kk
      real(kind=8):: g,gw,w1,w0
      integer(ISZ):: is,js,ip,isid
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      call checkz_arrays(ns)

      if (lzero) call zeroarry(curr,(nzzarr+1)*(1+nszarr))

      dzzi = 1. / dzz

!$OMP PARALLEL PRIVATE(ip,js,g,gw,kk,w1,w0)
      do is=1,pgroup%ns
        if (pgroup%sid(is-1) == -1) cycle

        if (lspeciesmoments) then
          js = pgroup%sid(is-1)
        else
          js = 0
        endif

        g = pgroup%sw(is)*pgroup%dtscale(is)*pgroup%sq(is)*dzzi
        gw = g
!$OMP DO
        do ip = pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
          kk = (pgroup%zp(ip) - zbeam - zzmin)*dzzi
          if (kk < 0 .or. kk+1 > nzzarr) cycle
          w1 = (pgroup%zp(ip) - zbeam - zzmin)*dzzi - kk
          w0 = 1. - w1
          if (wpid > 0) gw = g*pgroup%pid(ip,wpid)
          curr(kk  ,js) = curr(kk  ,js)+w0*gw*pgroup%gaminv(ip)*pgroup%uzp(ip)
          curr(kk+1,js) = curr(kk+1,js)+w1*gw*pgroup%gaminv(ip)*pgroup%uzp(ip)
        enddo

!$OMP END DO
      enddo
!$OMP END PARALLEL

      if (lzero) call percurr(curr,nzzarr,nszarr,bound0)

!$OMP MASTER
      if (ltoptimesubs) timesetcurr = timesetcurr + wtime() - substarttime
!$OMP END MASTER
      return
      end

[setcurr]
      subroutine percurr(curr,nzzarr,nszarr,bound0)
      use GlobalVars
      integer(ISZ):: nzzarr,nszarr,bound0
      real(kind=8):: curr(0:nzzarr,0:nszarr)

  Make current periodic.

#ifdef MPIPARALLEL
  Parallel version sums curr over all processors.
      call parallelsumrealarray(curr,nzzarr+1)
#endif

      if (nszarr > 0) then
        --- Sum up the current from all of the speices.
        curr(:,nszarr) = sum(curr(:,0:nszarr-1),2)
      endif

      if (bound0==periodic) then
        curr(0,:) = curr(0,:) + curr(nzzarr,:)
        curr(nzzarr,:) = curr(0,:)
      else
        curr(0,:) = 2.*curr(0,:)
        curr(nzzarr,:) = 2.*curr(nzzarr,:)
      endif

      return
      end

[step3d] [steprz] [stepxy]
      subroutine getvzofz
      use Z_arrays

   Computes the mean beam z velocity from the current and line charge density

      where (linechg /= 0.)
        vzofz = curr(:,nszarr)/linechg
      elsewhere
        vzofz = 0.
      end where

      return
      end

[step3d] [steprz] [stepxy]
      subroutine setegap
      use Beam_acc
      use InGaps
      use Z_arrays

   Computes the "gap" electric field as a function of z, using the "smeared gap"
   approximation.  We sweep back from head of beam, since that's the direction
   in which information moves.

      integer(ISZ):: i
      real(kind=8):: d

      if (ifgap) then

        do i = nzzarr-1, 1, -1
           d = dvnz(2.*rgap*dzz)
           d = dvnz(cgap*vbeam**2/dzz**2 + vbeam/d)
           egap(i-1) = (- cgap * vbeam**2 * (-2.*egap(i)+egap(i+1)) / dzz**2
     &                  + vbeam * egap(i+1) / dvnz(2.*rgap*dzz)
     &                  - egap(i) / dvnz(lgap)
     &                  + vbeam*(curr(i+1,nszarr) - curr(i-1,nszarr))/(2.*dzz)
     &                 ) / d
        enddo

      endif

      return
      end

[padvnc3d] [padvncxy]
      subroutine gapfield (np,zp,ez,zbeam,zzmin,egap,dzz)
      use InGaps
      integer(ISZ):: np
      real(kind=8):: zbeam,zzmin,dzz
      real(kind=8):: zp(np),ez(np),egap(0:*)

   Interpolates the spatially averaged gap electric field onto the particle
   positions, then uses it to augment the individual particle Ez's.

      real(kind=8):: dzzi,w1,w0
      integer(ISZ):: ip,kk

      if (ifgap) then

        dzzi = 1./dzz
        do ip=1,np
           kk = (zp(ip) - zbeam - zzmin) * dzzi
           w1 = (zp(ip) - zbeam - zzmin) * dzzi - kk
           w0 = 1. - w1
           if (kk < 0 .or. kk+1 > nzzarr) cycle
           ez(ip) = ez(ip) + w0*egap(kk) + w1*egap(kk+1)
        enddo

      endif

      return
      end

[wrzgen]
      subroutine setgamma(pgroup,lrelativ)
      use ParticleGroupmodule
      use Constant
      use InPart
      type(ParticleGroup):: pgroup
      logical(ISZ):: lrelativ
   Converts v to u, sets inverse gamma factor for all particles


      real(kind=8):: clghtisq,gamma
      integer(ISZ):: is,ip

      if (lrelativ) then
        clghtisq = 1./clight**2
        do is=1,pgroup%ns
          do ip = pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
             gamma = 1. / sqrt ( 1. -
     &    (pgroup%uxp(ip)**2 + pgroup%uyp(ip)**2 + pgroup%uzp(ip)**2)*clghtisq)
             pgroup%uxp(ip) = gamma*pgroup%uxp(ip)
             pgroup%uyp(ip) = gamma*pgroup%uyp(ip)
             pgroup%uzp(ip) = gamma*pgroup%uzp(ip)
             pgroup%gaminv(ip) = 1./gamma
          enddo
        enddo
      else
        do is=1,pgroup%ns
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
             pgroup%gaminv(ip) = 1.0
          enddo
        enddo
      endif

      return
      end

[particleboundaries3d] [particleboundariesxy] [particlegridboundaries3d]
      subroutine xparticleboundaries(pgroup,js1,js2,xmmax,xmmin,lcountaslost,
     &                               labs,lrz)
      use ParticleGroupmodule
      use GlobalVars
      use InGen
      use InPart
      use Picglb,Only: xpminlocal,xpmaxlocal
      use Parallel,Only: nxprocs
      type(ParticleGroup):: pgroup
      integer(ISZ):: js1,js2
      real(kind=8):: xmmax,xmmin
      logical(ISZ):: lcountaslost,labs,lrz

   Impose boundary conditions on xp

      real(kind=8):: syslen
      real(kind=8):: rrsq,rr,rrin
      integer(ISZ):: is,ip,indts

  Parallel version makes call to special routine particleboundaries_parallel,
  but only if the x direction is domain decomposed.
#ifdef MPIPARALLEL
      if (nxprocs > 1) then
        call particleboundaries_parallel(0,pgroup,js1,js2,
     &                                   xmmax,xmmin,xpmaxlocal,xpminlocal,
     &                                   pboundxy,pboundxy,
     &                                   lcountaslost,labs,lrz)
      else
#endif

      if (pboundxy==periodic) then

        --- periodic boundary condition
        syslen = xmmax - xmmin
        do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if (pgroup%xp(ip) < xmmin) then
              pgroup%xp(ip) = pgroup%xp(ip) +
     &                      syslen*int((xmmax - pgroup%xp(ip))/syslen)
            endif
            if (pgroup%xp(ip) >= xmmax) then
              pgroup%xp(ip) = pgroup%xp(ip) -
     &                      syslen*int((pgroup%xp(ip) - xmmin)/syslen)
            endif
          enddo
        enddo

      elseif(pboundxy==absorb) then

        --- particles absorbed (Dirichlet)
        if (lrz) then

          do is=js1+1,js2+1
            if (.not. pgroup%ldts(is-1)) cycle
            do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
              rrsq = pgroup%xp(ip)**2 + pgroup%yp(ip)**2
              if (rrsq > xmmax**2) then
                pgroup%gaminv(ip) = 0.
                if (.not. lcountaslost) pgroup%gaminv(ip) = -1.
              endif
            enddo
          enddo

        else

          do is=js1+1,js2+1
            if (.not. pgroup%ldts(is-1)) cycle
            do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
              if (pgroup%xp(ip) <= xmmin .or. pgroup%xp(ip) >= xmmax) then
                pgroup%gaminv(ip) = 0.
                if (.not. lcountaslost) pgroup%gaminv(ip) = -1.
              endif
            enddo
          enddo

        endif

      elseif(pboundxy==reflect) then

        if (lrz) then

          do is=js1+1,js2+1
            if (.not. pgroup%ldts(is-1)) cycle
            do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
              rrsq = pgroup%xp(ip)**2 + pgroup%yp(ip)**2
              if (rrsq >= xmmax**2) then
                rr = sqrt(rrsq)
                if (rrsq > xmmax**2) then
                  pgroup%xp(ip) = (2.*xmmax - rr)*(pgroup%xp(ip)/rr)
                  pgroup%yp(ip) = (2.*xmmax - rr)*(pgroup%yp(ip)/rr)
                else ! rrsq == xmmax**2
                  --- Shift the particle in bounds by a number that is small
                  --- compared to the dimensions of the grid.
                  rrin = xmmax - (xmmax - xmmin)*1.e-12
                  pgroup%xp(ip) = rrin*(pgroup%xp(ip)/rr)
                  pgroup%yp(ip) = rrin*(pgroup%yp(ip)/rr)
                endif
                pgroup%uxp(ip) = -pgroup%uxp(ip)
                pgroup%uyp(ip) = -pgroup%uyp(ip)
              endif
            enddo
          enddo

        else

          do is=js1+1,js2+1
            if (.not. pgroup%ldts(is-1)) cycle
            do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
              if (pgroup%xp(ip) >= xmmax) then
                if (pgroup%xp(ip) > xmmax) then
                  pgroup%xp(ip) = 2.*xmmax - pgroup%xp(ip)
                else ! xp == xmmax
                  --- Shift the particle in bounds by a number that is small
                  --- compared to the dimensions of the grid.
                  pgroup%xp(ip) = xmmax - (xmmax - xmmin)*1.e-12
                endif
                pgroup%uxp(ip) = -pgroup%uxp(ip)
              elseif (pgroup%xp(ip) < xmmin) then
                pgroup%xp(ip) = 2.*xmmin - pgroup%xp(ip)
                pgroup%uxp(ip) = -pgroup%uxp(ip)
              endif
            enddo
          enddo

        endif

      endif

#ifdef MPIPARALLEL
        --- End of if block for nxprocs == 1.
      endif
#endif

  --- end of ifelse above for parallel version.

      return
      end

[particleboundaries3d] [particleboundariesxy] [particlegridboundaries3d]
      subroutine yparticleboundaries(pgroup,js1,js2,ymmax,ymmin,lcountaslost,
     &                               labs,lskipy)
      use ParticleGroupmodule
      use GlobalVars
      use InGen
      use InPart
      use Picglb,Only: ypminlocal,ypmaxlocal
      use Parallel,Only: nyprocs
      type(ParticleGroup):: pgroup
      integer(ISZ):: js1,js2
      real(kind=8):: ymmax,ymmin
      logical(ISZ):: lcountaslost,labs,lskipy

   Impose boundary conditions on yp

      real(kind=8):: syslen
      integer(ISZ):: is,ip,indts

      --- With radial geometry, the boundary conditions are handled by
      --- xparticleboundaries, so nothing is needed here.
      --- lskipy would also be true for XZ geometry.
      if (lskipy) return

  Parallel version makes call to special routine particleboundaries_parallel,
  but only if the y direction is domain decomposed.
#ifdef MPIPARALLEL
      if (nyprocs > 1) then
        call particleboundaries_parallel(1,pgroup,js1,js2,
     &                                   ymmax,ymmin,ypmaxlocal,ypminlocal,
     &                                   pboundxy,pboundxy,
     &                                   lcountaslost,labs,lskipy)
      else
#endif

      if (pboundxy==periodic) then
      --- periodic boundary condition
        syslen = ymmax - ymmin
        do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if (pgroup%yp(ip) < ymmin) then
              pgroup%yp(ip) = pgroup%yp(ip) +
     &                      syslen*int((ymmax - pgroup%yp(ip))/syslen)
            endif
            if (pgroup%yp(ip) >= ymmax) then
              pgroup%yp(ip) = pgroup%yp(ip) -
     &                      syslen*int((pgroup%yp(ip) - ymmin)/syslen)
            endif
          enddo
        enddo

      elseif(pboundxy==absorb) then

        --- particles absorbed (Dirichlet)
      do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if (pgroup%yp(ip) <= ymmin .or. pgroup%yp(ip) >= ymmax) then
              pgroup%gaminv(ip) = 0.
              if (.not. lcountaslost) pgroup%gaminv(ip) = -1.
            endif
          enddo
        enddo

      elseif(pboundxy==reflect) then

        do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if (pgroup%yp(ip) >= ymmax) then
              if (pgroup%yp(ip) > ymmax) then
                pgroup%yp(ip) = 2.*ymmax - pgroup%yp(ip)
              else ! yp == ymmax
                --- Shift the particle in bounds by a number that is small
                --- compared to the dimensions of the grid.
                pgroup%yp(ip) = ymmax - (ymmax - ymmin)*1.e-12
              endif
              pgroup%uyp(ip) = -pgroup%uyp(ip)
            elseif (pgroup%yp(ip) < ymmin) then
              pgroup%yp(ip) = 2.*ymmin - pgroup%yp(ip)
              pgroup%uyp(ip) = -pgroup%uyp(ip)
            endif
          enddo
        enddo

      endif

#ifdef MPIPARALLEL
        --- End of if block for nyprocs == 1.
      endif
#endif

  --- end of ifelse above for parallel version.

      return
      end

[particleboundaries3d] [particlegridboundaries3d]
      subroutine zparticleboundaries(pgroup,js1,js2,zmmax,zmmin,lcountaslost)
      use ParticleGroupmodule
      use GlobalVars
      use InGen
      use InPart
      use Picglb,Only: zgrid,zpminlocal,zpmaxlocal
      use Subcycling,Only: ndtstorho,zgridndts
      use Parallel,Only: nzprocs
      type(ParticleGroup):: pgroup
      integer(ISZ):: js1,js2
      real(kind=8):: zmmax,zmmin
      logical(ISZ):: lcountaslost

   Impose boundary conditions on zp

   --- Note that if ndtstorho (and therefore zgridndts) has not been setup
   --- yet, then the code uses zgrid instead of zgridndts. This would likely
   --- only ever happen if this is called before a generate had been done.

      real(kind=8):: syslen,zg
      integer(ISZ):: is,ip,indts

  Parallel version makes call to special routine particleboundaries_parallel,
  but only if the z direction is domain decomposed.
#ifdef MPIPARALLEL
      if (nzprocs > 1) then
        call particleboundaries_parallel(2,pgroup,js1,js2,
     &                                   zmmax,zmmin,zpmaxlocal,zpminlocal,
     &                                   pbound0,pboundnz,
     &                                   lcountaslost,.false.,.false.)
      else
#endif

      if (pbound0==periodic) then
      --- periodic boundary condition
        syslen = zmmax - zmmin
        do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          indts = ndtstorho(pgroup%ndts(is-1))
          if (indts > -1) then
            zg = zgridndts(indts)
          else
            zg = zgrid
          endif

          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if (pgroup%zp(ip) < zg + zmmin) then
              pgroup%zp(ip) = pgroup%zp(ip) +
     &                      syslen*int((zg + zmmax - pgroup%zp(ip))/syslen)
            endif
            if (pgroup%zp(ip) >= zg + zmmax) then
              pgroup%zp(ip) = pgroup%zp(ip) -
     &                      syslen*int((pgroup%zp(ip) - zg - zmmin)/syslen)
            endif
          enddo
        enddo

      else
      --- other boundary condition (particles absorbed or reflected)
        do is=js1+1,js2+1
          if (.not. pgroup%ldts(is-1)) cycle
          indts = ndtstorho(pgroup%ndts(is-1))
          if (indts > -1) then
            zg = zgridndts(indts)
          else
            zg = zgrid
          endif
          do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
            if ((pgroup%zp(ip)-zg) >= zmmax) then
              if(pboundnz==absorb) then
                pgroup%gaminv(ip) = 0.
                if (.not. lcountaslost) pgroup%gaminv(ip) = -1.
              elseif(pboundnz==reflect) then
                if ((pgroup%zp(ip)-zg) > zmmax) then
                  pgroup%zp(ip) = 2.*zg + 2.*zmmax - pgroup%zp(ip)
                else ! zp-zg == zmmax
                --- Shift the particle in bounds by a number that is small
                --- compared to the dimensions of the grid.
                  pgroup%zp(ip) = zg + zmmax - (zmmax - zmmin)*1.e-12
                endif
                pgroup%uzp(ip) = -pgroup%uzp(ip)
              end if
            elseif ((pgroup%zp(ip)-zg) <= zmmin) then
              if(pbound0==absorb) then
                pgroup%gaminv(ip) = 0.
                if (.not. lcountaslost) pgroup%gaminv(ip) = -1.
              elseif(pbound0==reflect) then
                pgroup%zp(ip) = 2.*zg + 2.*zmmin - pgroup%zp(ip)
                pgroup%uzp(ip) = -pgroup%uzp(ip)
              end if
            endif
          enddo
        enddo
      endif

#ifdef MPIPARALLEL
        --- End of if block for nzprocs == 1.
      endif
#endif

  --- end of ifelse above for parallel version.

      return
      end

[xpush3dintrp]
      subroutine particleboundarieswithdata(n,x,y,z,ux,uy,uz,gaminv,
     &                                      xmmin,xmmax,ymmin,ymmax,zmmin,zmmax,
     &                                      zgrid,
     &                                      pboundxy,pbound0,pboundnz,lrz)
      use GlobalVars
      integer(ISZ):: n
      real(kind=8):: x(n),y(n),z(n),ux(n),uy(n),uz(n),gaminv(n)
      real(kind=8):: xmmin,xmmax,ymmin,ymmax,zmmax,zmmin,zgrid
      integer(ISZ):: pboundxy,pbound0,pboundnz
      logical(ISZ):: lrz

  Impose particles boundary conditions

      real(kind=8),allocatable:: r(:),ri(:),ur(:),ut(:)

      if (lrz) then
        allocate(r(n),ri(n),ur(n),ut(n))
        r = sqrt(x**2 + y**2)
        ri = 1./r
        ur = ux*x*ri + uy*y*ri
        ut = ux*y*ri - uy*x*ri
        call partbndwithdata(n,r,ur,gaminv,xmmax,xmmin,0.,pboundxy,pboundxy)
        ux = ur*x*ri - ut*y*ri
        uy = ur*y*ri + ut*x*ri
        x = r*x*ri
        y = r*y*ri
        deallocate(r,ri,ur,ut)
      else
        call partbndwithdata(n,x,ux,gaminv,xmmax,xmmin,0.,pboundxy,pboundxy)
        call partbndwithdata(n,y,uy,gaminv,ymmax,ymmin,0.,pboundxy,pboundxy)
      endif
      
      call partbndwithdata(n,z,uz,gaminv,zmmax,zmmin,zgrid,pbound0,pboundnz)

      return
      end

[particleboundarieswithdata]
      subroutine partbndwithdata(n,z,uz,gaminv,zmmax,zmmin,zgrid,
     &                           pbound0,pboundnz)
      use GlobalVars
      integer(ISZ):: n
      real(kind=8):: z(n),uz(n),gaminv(n)
      real(kind=8):: zmmax,zmmin,zgrid
      integer(ISZ):: pbound0,pboundnz

  Impose particles boundary conditions along one axis

      real(kind=8):: syslen
      integer(ISZ):: is,ip

      if (pbound0==periodic) then

      --- periodic boundary condition
        syslen = zmmax - zmmin
        do ip=1,n
          if (z(ip) < zgrid + zmmin)
     &      z(ip) = z(ip) + syslen*int((zgrid + zmmax - z(ip))/syslen)
          if (z(ip) >= zgrid + zmmax)
     &      z(ip) = z(ip) - syslen*int((z(ip) - zgrid - zmmin)/syslen)
        enddo

      else

      --- other boundary condition (particles absorbed or reflected)
        do ip=1,n

          if ((z(ip)-zgrid) >= zmmax) then

            if(pboundnz==absorb) then
              gaminv(ip) = 0.
            elseif(pboundnz==reflect) then
              if ((z(ip)-zgrid) > zmmax) then
                z(ip) = 2.*zgrid + 2.*zmmax - z(ip)
              else ! if (z(ip)-zgrid) == zmmax
                z(ip) = zgrid + zmmax - (zmmax - zmmin)*1.e-12
              endif
              uz(ip) = -uz(ip)
            end if

          elseif ((z(ip)-zgrid) <= zmmin) then

            if(pbound0==absorb) then
              gaminv(ip) = 0.
            elseif(pbound0==reflect) then
              z(ip) = 2.*zgrid + 2.*zmmin - z(ip)
              uz(ip) = -uz(ip)
            end if

          endif

        enddo
      endif

      return
      end

      subroutine reorgparticles(pgroup,l4symtry,l2symtry,lrz)
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      logical(ISZ):: l4symtry,l2symtry,lrz
#ifdef MPIPARALLEL
      call reorgparticles_parallel(pgroup,l4symtry,l2symtry,lrz)
#endif
      return
      end

[particleboundaries3d] [particleboundariesxy]
      subroutine semitransparent_disc(pgroup,dz)
      use ParticleGroupmodule
      use InGen
      use Moments
      use InPart
      use SemiTransparentDisc
      type(ParticleGroup):: pgroup
      real(kind=8), intent(in) :: dz 

  Random absorption of particles passing through a semitranparent disc.
  This assumes that no particle travels more than dz in one time step.
  dz is used only for efficiency purpose: only particles within dz 
  of semi-transparent disc location are checked for disc crossing.

      integer(ISZ) :: is, ip, izd, idisc
      real(kind=8) :: zpold, dtcross, xpatdisc, ypatdisc, r, wranf
     &              , zmind(n_STdiscs),zmaxd(n_STdiscs)

      if(n_STdiscs==0) return

      do idisc = 1, n_STdiscs
        zmind(idisc) = z_STdiscs(idisc)-dz
        zmaxd(idisc) = z_STdiscs(idisc)+dz
      end do

      --- loop on species
      do is=1,pgroup%ns
        --- do it only if species was advanced on this time step
        if (.not. pgroup%ldts(is-1)) cycle
        --- loop on particles
        do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
          --- loop on number of discs
          do idisc = 1, n_STdiscs
            --- Skip species not being absorbed by this disc
            if (.not. s_STdiscs(idisc,is)) cycle
            --- do only the work for particles close to disc location
            if(pgroup%zp(ip)>=zmind(idisc) .and.
     &         pgroup%zp(ip)<=zmaxd(idisc)) then
              zpold = pgroup%zp(ip)-dt*pgroup%uzp(ip)*pgroup%gaminv(ip)
              --- test if particle crossed disc location
              if(zpold<z_STdiscs(idisc) .and.
     &           pgroup%zp(ip)>=z_STdiscs(idisc)) then
                dtcross = dt*(pgroup%zp(ip) - z_STdiscs(idisc))/
     &                       (pgroup%zp(ip) - zpold)
                xpatdisc = pgroup%xp(ip) -
     &                     dtcross*pgroup%uxp(ip)*pgroup%gaminv(ip)
                ypatdisc = pgroup%yp(ip) -
     &                     dtcross*pgroup%uyp(ip)*pgroup%gaminv(ip)
                r = sqrt(xpatdisc**2+ypatdisc**2)
                --- test if particle passed through disc
                if(r<=r_STdiscs(idisc)) then
                   --- randomly absorb particles according to transparency
                   --- coefficient
                   if(wranf()>t_STdiscs(idisc)) then
                     pgroup%gaminv(ip) = 0.
                   end if
                end if
              end if
            end if
          end do
        end do
      end do
      
      end
  The following two subroutines are made obsolete by zpartbnd above, but
  are maintained for use by the wrz and wxy packages.

[padvncrz]
      subroutine periz(np,zp,zgrid,zmmax,zmmin)
      integer(ISZ):: np
      real(kind=8):: zgrid,zmmax,zmmin
      real(kind=8):: zp(np)

   Imposes periodic boundary conditions on zp

      real(kind=8):: syslen,sysleni
      integer(ISZ):: ip

      syslen = zmmax - zmmin
      sysleni = 1./syslen
      do ip=1,np
        if (zp(ip) < zgrid + zmmin)
     &    zp(ip) = zp(ip) + syslen*int((zgrid + zmmax - zp(ip))*sysleni)
        if (zp(ip) > zgrid + zmmax)
     &    zp(ip) = zp(ip) - syslen*int((zp(ip) - zgrid - zmmin)*sysleni)
      enddo

      return
      end

[padvncrz]
      subroutine stckyz(np,zp,zmmax,zmmin,dz,uxp,uyp,uzp,gaminv,zgrid)
      integer(ISZ):: np
      real(kind=8):: zmmax,zmmin,dz,zgrid
      real(kind=8):: zp(np)
      real(kind=8):: uxp(np),uyp(np),uzp(np),gaminv(np)

   Enforces sticky b.c.'s on the z walls.
   Particles stick at the extreme edges of the grid.

      integer(ISZ):: ip

      do ip=1,np
        if ((zp(ip)-zgrid) >= zmmax) then
          gaminv(ip) = 0.
        elseif ((zp(ip)-zgrid) <= zmmin) then
          gaminv(ip) = 0.
        endif
      enddo

      return
      end

      real(kind=8) function getbeamcom(pgroup)
      use ParticleGroupmodule
      use InPart
      type(ParticleGroup):: pgroup

  Calculate the center of mass of the beam along the z axis from the particles

      integer(ISZ):: is,ip
      real(kind=8):: totz,totmass
#ifdef MPIPARALLEL
      real(kind=8):: data(2)
#endif

      totz = 0.
      totmass = 0.
      do is=1,1!pgroup%ns ! Temporary fix for multi-species runs
        do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
          totz = totz + pgroup%zp(ip)*pgroup%sm(is)
          totmass = totmass + pgroup%sm(is)
        enddo
      enddo

#ifdef MPIPARALLEL
      data(1) = totz
      data(2) = totmass
      call parallelsumrealarray(data,2)
      totz = data(1)
      totmass = data(2)
#endif

      getbeamcom = totz/dvnz(totmass)

      return
      end

[addpart] [alotpart] [chckpart] [copygrouptogroup] [getextrapolatedparticles] [inject3d] [padvnc3d] [stptcl3d] [w3dgen]
      subroutine setuppgroup(pgroup)
      use ParticleGroupmodule
      use Particles,Only: npid
      type(ParticleGroup):: pgroup

      integer(ISZ):: is

      pgroup%npid = npid
      call gchange("Particles",0)
      call ParticleGroupchange(pgroup)

      --- Make sure that ins has a sensible value for all species
      if (pgroup%ins(1) == 0) pgroup%ins(1) = 1
      where (pgroup%ins(2:) == 0)
        pgroup%ins(2:) = pgroup%ins(:pgroup%ns-1) + pgroup%nps(:pgroup%ns-1)
      end where

      --- Make sure that ipmax has reasonable values
      pgroup%ipmax(0) = 0
      do is=1,pgroup%ns-1
        if (pgroup%ipmax(is) < pgroup%ins(is)+pgroup%nps(is)-1 .or.
     &      pgroup%ipmax(is) > pgroup%ins(is+1)-1) then
           pgroup%ipmax(is) = (pgroup%ins(is)+pgroup%nps(is)-1 +
     &                         pgroup%ins(is+1)-1)/2
        endif
      enddo
      pgroup%ipmax(pgroup%ns) = pgroup%npmax

      return
      end

[injctint] [reorgparticles_parallel] [wrzgen]
      subroutine alotpart(pgroup)
      use Subtimerstop
      use ParticleGroupmodule
      use InPart
      use Particles,Only: npmax
      type(ParticleGroup):: pgroup

  Allocate space for particles and set up associated arrays.

      integer(ISZ):: is,sid(0:ns)
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- Either npmax or np_s can be used as input quantities. If one of
      --- them is zero, set it from the other. If both are set, the code
      --- uses np_s, ignoring npmax.
      do is=1,ns
        if (np_s(is) == 0) np_s(is) = npmax*sp_fract(is)
      enddo
      if (pgroup%npmax == 0) pgroup%npmax = sum(np_s)

      --- Force pgroup%npmax to be large enough to hold all of the particles
      --- to be loaded.
      pgroup%npmax = sum(np_s)

      --- Allocate space.
      pgroup%ns = ns
      call setuppgroup(pgroup)

      --- setup indexing arrays, dividing up the space based on np_s
      pgroup%ipmax(0) = 0
      do is=1,pgroup%ns
        pgroup%ins(is) = pgroup%ipmax(is-1) + 1
        pgroup%nps(is) = 0
        pgroup%ipmax(is) = pgroup%ipmax(is-1) + np_s(is)
        if (pgroup%sid(is-1) == -1) pgroup%sid(is-1) = is-1
      enddo
      pgroup%ipmax(ns) = pgroup%npmax

!$OMP MASTER
      if (ltoptimesubs) timealotpart = timealotpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[addpart] [impact_ion] [injctint] [inject3d] [particleboundaries_parallel] [stptcl3d]
      subroutine chckpart(pgroup,is,nlower,nhigher)
      use ParticleGroupmodule
      use Subtimerstop
      use InPart
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,nlower,nhigher

  Make sure that there is enough space in the particle arrays for nlower
  new particles below and nhigher above the live particles.  Returns if
  there is already enough space above and below.  If there is enough total
  space but not enough room above or below, the particles are shifted
  appropriately. If there is not enough space, add more to the arrays.
  Particle data is shifted appropriately.

      integer(ISZ):: nadd,naddlower,naddhigher,i,ishift
      integer(ISZ):: ilower,ihigher,spacebelow,spaceabove
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- Call this here in case chckpart has been called before alotpart.
      call setuppgroup(pgroup)

      --- Make sure that 'is' is within the correct range
      if (is < 1 .or. is > pgroup%ns) then
        print*,"chckpart: warning: the input species is ",is,", but must be between 1 and ",pgroup%ns
        call kaboom("chckpart: species number is invalid")
        return
      endif

      --- ilower is the lowest usable array location.
      ilower = pgroup%ipmax(is-1) + 1

      --- ihigher is the highest usable array location.
      ihigher = pgroup%ipmax(is)

      --- Calculate the available space.
      spacebelow = pgroup%ins(is) - ilower
      spaceabove = ihigher - (pgroup%ins(is) + pgroup%nps(is) - 1)

      --- If there is already enough space, then return.
      if (spacebelow >= nlower .and.
     &    spaceabove >= nhigher) return

      --- If there is not enough space, then allocate new space.
      if (nlower+nhigher > spacebelow+spaceabove) then

        --- Amount of new space to add: this is calculated to be a sizable
        --- chunk, but not too big.  By default, add 10 times the space needed.
        --- Limit that to 10000, except, if the space needed is greater than
        --- 10000, add the amount of space needed. Then subtract off the
        --- space that is already there.
        naddlower = min(10*nlower, max(10000, nlower))
        naddlower = max(0,naddlower - spacebelow)
        naddhigher = min(10*nhigher, max(10000, nhigher))
        naddhigher = max(0,naddhigher - spaceabove)
        nadd = naddlower + naddhigher

        --- change the particle array dimension variables and allot space
        pgroup%npmax = pgroup%npmax + nadd
        call ParticleGroupchange(pgroup)

        --- Loop over species species above 'is', shifting
        --- them up to make space below for species is.
        do i=pgroup%ns,is+1,-1
          pgroup%ipmax(i) = pgroup%ipmax(i) + nadd
          call shftpart(pgroup,i,nadd)
        enddo
        pgroup%ipmax(is) = pgroup%ipmax(is) + nadd

        --- The new space will be above the existing particles, so adjust
        --- the two quantities appropriately.
        spaceabove = spaceabove + nadd
        naddhigher = max(0,naddhigher - nadd)

      else
        naddlower = max(0,nlower - spacebelow)
        naddhigher = max(0,nhigher - spaceabove)
      endif

      --- Now, there is enough room to shift the particles appropriately.
      --- Note that is some cases no shift will be needed.  In those cases,
      --- ishift comes out to be zero and shftpart returns immediately when
      --- ishift is zero. Note that in all cases, only one of naddlower
      --- and naddhigher will be nonzero.
      ishift = naddlower - naddhigher
      call shftpart(pgroup,is,ishift)

!$OMP MASTER
      if (ltoptimesubs) timechckpart = timechckpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[chckpart] [shrinkpart]
      subroutine shftpart(pgroup,is,ishift)
      use ParticleGroupmodule
      use InPart
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,ishift

      --- Make sure that 'is' is within the correct range
      if (is < 1 .or. is > pgroup%ns) then
        print*,"shftpart: warning: the input species is ",is,", but must be between 1 and ",pgroup%ns
        call kaboom("shftpart: species number is invalid")
        return
      endif

      if (pgroup%npid > 0) then
        call shftpartwork(is,ishift,pgroup%npmax,pgroup%ns,
     &                    pgroup%ins,pgroup%nps,pgroup%npid,
     &                    pgroup%xp,pgroup%yp,pgroup%zp,
     &                    pgroup%uxp,pgroup%uyp,pgroup%uzp,pgroup%gaminv,
     &                    .true.,
     &                    pgroup%ex,pgroup%ey,pgroup%ez,
     &                    pgroup%bx,pgroup%by,pgroup%bz,
     &                    pgroup%pid)
      else
        call shftpartwork(is,ishift,pgroup%npmax,pgroup%ns,
     &                    pgroup%ins,pgroup%nps,pgroup%npid,
     &                    pgroup%xp,pgroup%yp,pgroup%zp,
     &                    pgroup%uxp,pgroup%uyp,pgroup%uzp,pgroup%gaminv,
     &                    .true.,
     &                    pgroup%ex,pgroup%ey,pgroup%ez,
     &                    pgroup%bx,pgroup%by,pgroup%bz,
     &                    0.)
      endif
      return
      end

[shftlostpart] [shftpart]
      subroutine shftpartwork(is,ishift,npmax,ns,ins,nps,
     &                        npid,xp,yp,zp,uxp,uyp,uzp,gaminv,
     &                        lshiftfields,ex,ey,ez,bx,by,bz,pid)
      use Subtimerstop
      integer(ISZ):: is,ishift
      integer(ISZ):: npmax,ns,ins(ns),nps(ns),npid
      real(kind=8):: xp(npmax),yp(npmax),zp(npmax)
      real(kind=8):: uxp(npmax),uyp(npmax),uzp(npmax)
      real(kind=8):: gaminv(npmax),pid(npmax,npid)
      logical(ISZ):: lshiftfields
      real(kind=8):: ex(npmax),ey(npmax),ez(npmax)
      real(kind=8):: bx(npmax),by(npmax),bz(npmax)

  Shift particles by 'ishift'.
  Assumes that there is enough space in the particle arrays to make the
  shift without clobbering other particles or going past the ends of
  the arrays.

      integer(ISZ):: ip
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- Make sure that 'is' is within the correct range
      if (is < 1 .or. is > ns) then
        print*,"shftpartwork: warning: the input species is ",is,", but must be between 1 and ",ns
        call kaboom("shftpartwork: species number is invalid")
        return
      endif

      --- return if shift is zero
      if (ishift == 0) return

      --- Positive and negative shifts are treated differently since
      --- a negative shift may result in lost data.
      if (ishift > 0) then

        --- Loop from top of live particles to the bottom.
        do ip=ins(is)+nps(is)-1, ins(is), -1
          xp(ip+ishift) = xp(ip)
          yp(ip+ishift) = yp(ip)
          zp(ip+ishift) = zp(ip)
          uxp(ip+ishift) = uxp(ip)
          uyp(ip+ishift) = uyp(ip)
          uzp(ip+ishift) = uzp(ip)
          gaminv(ip+ishift) = gaminv(ip)
          if (lshiftfields) then
            ex(ip+ishift) = ex(ip)
            ey(ip+ishift) = ey(ip)
            ez(ip+ishift) = ez(ip)
            bx(ip+ishift) = bx(ip)
            by(ip+ishift) = by(ip)
            bz(ip+ishift) = bz(ip)
          endif
          if (npid > 0) then
            pid(ip+ishift,:) = pid(ip,:)
          endif
        enddo
        --- Move this into a seperate loop since some compilers were screwing
        --- up. This will also be slightly faster too, maybe.
        do ip=ins(is),ins(is)+ishift-1
          gaminv(ip) = 0.
          if (npid > 0) pid(ip,:) = 0
        enddo

        --- change lower bound of live particles
        ins(is) = ins(is) + ishift

      else

        --- Loop over live particles.
        do ip=ins(is), ins(is)+nps(is)-1
          xp(ip+ishift) = xp(ip)
          yp(ip+ishift) = yp(ip)
          zp(ip+ishift) = zp(ip)
          uxp(ip+ishift) = uxp(ip)
          uyp(ip+ishift) = uyp(ip)
          uzp(ip+ishift) = uzp(ip)
          gaminv(ip+ishift) = gaminv(ip)
          if (lshiftfields) then
            ex(ip+ishift) = ex(ip)
            ey(ip+ishift) = ey(ip)
            ez(ip+ishift) = ez(ip)
            bx(ip+ishift) = bx(ip)
            by(ip+ishift) = by(ip)
            bz(ip+ishift) = bz(ip)
          endif
          if (npid > 0) then
            pid(ip+ishift,:) = pid(ip,:)
          endif
        enddo
        --- Move this into a seperate loop since some compilers were screwing
        --- up. This will also be slightly faster too, maybe.
        do ip=ins(is)+nps(is)-1,ins(is)+nps(is)-1+ishift+1,-1
          gaminv(ip) = 0.
          if (npid > 0) pid(ip,:) = 0
        enddo

        --- change lower bound of live particles
        ins(is) = ins(is) + ishift

      endif

!$OMP MASTER
      if (ltoptimesubs) timeshftpartwork = timeshftpartwork + wtime() - substarttime
!$OMP END MASTER

      return
      end

      subroutine copypart(pgroup,it,nn,ii,istart)
      use ParticleGroupmodule
      use Subtimerstop
      use InPart
      type(ParticleGroup):: pgroup
      integer(ISZ):: it,nn,istart
      integer(ISZ):: ii(0:nn-1)

  Copies particle data from locations given by ii to the locations starting
  at 'it', or if istart > 0, copy sequential particles from locations
  starting at 'istart' to locations starting at 'it'.

      integer(ISZ):: i,in,ij
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      if (istart > 0) then
        do i=0,nn-1
          ij = istart + i
          in = it + i
          pgroup%xp(in) = pgroup%xp(ij)
          pgroup%yp(in) = pgroup%yp(ij)
          pgroup%zp(in) = pgroup%zp(ij)
          pgroup%uxp(in) = pgroup%uxp(ij)
          pgroup%uyp(in) = pgroup%uyp(ij)
          pgroup%uzp(in) = pgroup%uzp(ij)
          pgroup%gaminv(in) = pgroup%gaminv(ij)
          pgroup%ex(in) = pgroup%ex(ij)
          pgroup%ey(in) = pgroup%ey(ij)
          pgroup%ez(in) = pgroup%ez(ij)
          pgroup%bx(in) = pgroup%bx(ij)
          pgroup%by(in) = pgroup%by(ij)
          pgroup%bz(in) = pgroup%bz(ij)
          if (pgroup%npid > 0) then
            pgroup%pid(in,:) = pgroup%pid(ij,:)
          endif
        enddo
      else
        do i=0,nn-1
          ij = ii(i)
          in = it + i
          pgroup%xp(in) = pgroup%xp(ij)
          pgroup%yp(in) = pgroup%yp(ij)
          pgroup%zp(in) = pgroup%zp(ij)
          pgroup%uxp(in) = pgroup%uxp(ij)
          pgroup%uyp(in) = pgroup%uyp(ij)
          pgroup%uzp(in) = pgroup%uzp(ij)
          pgroup%gaminv(in) = pgroup%gaminv(ij)
          pgroup%ex(in) = pgroup%ex(ij)
          pgroup%ey(in) = pgroup%ey(ij)
          pgroup%ez(in) = pgroup%ez(ij)
          pgroup%bx(in) = pgroup%bx(ij)
          pgroup%by(in) = pgroup%by(ij)
          pgroup%bz(in) = pgroup%bz(ij)
          if (pgroup%npid > 0) then
            pgroup%pid(in,:) = pgroup%pid(ij,:)
          endif
        enddo
      endif

!$OMP MASTER
      if (ltoptimesubs) timecopypart = timecopypart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[particleboundaries_parallel]
      subroutine addpart(pgroup,nn,nnpid,x,y,z,vx,vy,vz,gi,
     &                   ex,ey,ez,bx,by,bz,ppid,is,
     &                   lallindomain,xmmin,xmmax,ymmin,ymmax,zmmin,zmmax,
     &                   l2symtry,l4symtry,lrz,
     &                   lmomentum,lfields,lnewparticles,lusespaceabove)
      use ParticleGroupmodule
      use Subtimerstop
      use Constant
      use Particles,Only: spid,xbirthpid,ybirthpid,zbirthpid,uxbirthpid,uybirthpid,uzbirthpid,ssn
      use Beam_acc
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,nn,nnpid
      real(kind=8):: x(nn),y(nn),z(nn),vx(nn),vy(nn),vz(nn),gi(nn)
      real(kind=8):: ex(nn),ey(nn),ez(nn),bx(nn),by(nn),bz(nn)
      real(kind=8):: ppid(nn,nnpid)
      real(kind=8):: xmmin,xmmax,ymmin,ymmax,zmmin,zmmax
      logical(ISZ):: l2symtry,l4symtry,lrz
      logical(ISZ):: lallindomain,lmomentum,lfields,lnewparticles
      logical(ISZ):: lusespaceabove

  Adds new particles to the simulation
    nn: number of particles to add
    x,y,z,vx,vy,vz,gi: coordinates, velocities and gamma inverse
    ex,ey,ez,bx,by,bz: E and B fields associated with the particles
    is: species number to add particles to
    lallindomain: When true, all particles are assumed to be within the
                  extent of the domain and the scraping check is skipped
    xmmin,xmmax,ymmin,ymmax,zmmin,zmmax: extent of the domain
    l2symtry,l4symtry,lrz: symmetry settings, used when lallindomain is
                           false and the new particles are scraped on
                           the edge of the domain
    lmomentum: Set to false when velocites are input as velocities, and true
               when input as massless momentum. Only used when lrelativ is
               true.
    lfields: when true, copy the fields data
    lnewparticles: when true, the particles are treated as newly created
                   particles. The ssn will be set if needed, and the
                   position saved as the birth location.
    lusespaceabove: when true, the new particles are preferentially
                    placed in the space above the existing particles,
                    otherwise below.

      integer(ISZ):: ip,n1,i1,i2,j1,j2,ii
      integer(ISZ):: ilower,ihigher,spacebelow,spaceabove,nbelow,nabove
      integer(ISZ):: ipgrp1(2),ipgrp2(2)
      integer(ISZ):: inew1(2),inew2(2)
      real(kind=8):: substarttime,wtime,xt,yt
      if (ltoptimesubs) substarttime = wtime()

      if (lnewparticles) then
        --- pgroup should only ever need setting up if the particles
        --- being added are new. However, not calling setuppgroup could
        --- lead to crashes in odd cases, for example if non-new
        --- particles are being added to a new pgroup or if pgroup%ipmax
        --- has somehow been corrupted. Not calling it would save only a
        --- small amount of time, and is probably not with the risk.
        call setuppgroup(pgroup)
      endif

      --- Make sure that 'is' is within the correct range
      if (is < 1 .or. is > pgroup%ns) then
        print*,"addpart: warning: the input species is ",is,", but must be between 1 and ",pgroup%ns
        call kaboom("addpart: species number is invalid")
        return
      endif

      --- Make sure that sid is initialized. If not, assume a one-to-one
      --- correspondance between the global list of species and the list
      --- of species in this pgroup.
      if (pgroup%sid(is-1) == -1) pgroup%sid(is-1) = is-1

      --- Make sure that the particles are within the extent of the domain,
      --- unless the input declares this to be so, i.e. lallindomain is true.
      if (lallindomain) then
        n1 = nn
      else
        n1 = 0
        do ip=1,nn
          if (z(ip) < zmmin  .or. z(ip) >= zmmax) cycle
          if (lrz) then
            xt = sqrt(x(ip)**2 + y(ip)**2)
            if (xt < xmmin .or. xt >= xmmax) cycle
          else
            xt = x(ip)
            if (l4symtry) xt = abs(xt)
            yt = y(ip)
            if (l2symtry .or. l4symtry) yt = abs(yt)
            if (xt < xmmin .or. xt >= xmmax .or.
     &          yt < ymmin .or. yt >= ymmax) cycle
          endif
          n1 = n1 + 1
          if (n1 < ip) then
            x(n1) = x(ip)
            y(n1) = y(ip)
            z(n1) = z(ip)
            vx(n1) = vx(ip)
            vy(n1) = vy(ip)
            vz(n1) = vz(ip)
            gi(n1) = gi(ip)
            if (lfields) then
              ex(n1) = ex(ip)
              ey(n1) = ey(ip)
              ez(n1) = ez(ip)
              bx(n1) = bx(ip)
              by(n1) = by(ip)
              bz(n1) = bz(ip)
            end if
            if (nnpid > 0) ppid(n1,:) = ppid(ip,:)
          endif
        enddo
      endif

      --- If velocities input are not momentum, and lrelativ is true,
      --- calculate gamma inverse and convert velocities to massless
      --- momentum.
      if (.not. lmomentum .and. lrelativ) then
        gi(1:n1) = sqrt(1.- (vx(1:n1)**2+vy(1:n1)**2+vz(1:n1)**2)/clight**2)
        vx(1:n1) = vx(1:n1)/gi(1:n1)
        vy(1:n1) = vy(1:n1)/gi(1:n1)
        vz(1:n1) = vz(1:n1)/gi(1:n1)
      endif

      --- Make room for the particles
      --- Depending on the value of lusespaceabove, particles are first added
      --- to either the memory above or below the existing particles. If
      --- there is not enough space there, split the new particles, adding
      --- some to the other side as well. If there still isn't enough space,
      --- call chckpart and increase the size of the arrays.  
      --- This should hopefully minimize the movement of particle data,
      --- only shifting the data when absolutely necessary.

      --- ilower is the lowest usable array location.
      --- Note that the call to setuppgroup above is needed to make sure
      --- that ipmax is consistent with ins and nps.
      ilower = pgroup%ipmax(is-1) + 1

      --- ihigher is the highest usable array location.
      ihigher = pgroup%ipmax(is)

      --- Calculate the available space.
      spacebelow = pgroup%ins(is) - ilower
      spaceabove = ihigher - (pgroup%ins(is) + pgroup%nps(is) - 1)

      --- If there is not enough room on both ends for the new
      --- particles, increase the size of the arrays.
      if (n1 > spacebelow+spaceabove) then
        if (lusespaceabove) then
          call chckpart(pgroup,is,0,n1)
        else
          call chckpart(pgroup,is,n1,0)
        endif

        --- The indices need to be recalculated.
        --- ilower is the lowest usable array location.
        ilower = pgroup%ipmax(is-1) + 1

        --- ihigher is the highest usable array location.
        ihigher = pgroup%ipmax(is)

        --- Calculate the available space.
        spacebelow = pgroup%ins(is) - ilower
        spaceabove = ihigher - (pgroup%ins(is) + pgroup%nps(is) - 1)

      endif

      if (lusespaceabove) then
        --- Use up the space above the particles first, putting leftovers below
        nabove = min(n1,spaceabove)
        nbelow = n1 - nabove
      else
        --- Use up the space below the particles first, putting leftovers above
        nbelow = min(n1,spacebelow)
        nabove = n1 - nbelow
      endif

      --- Setup the indices for the pgroup arrays
      ipgrp1(1) = pgroup%ins(is) - nbelow
      ipgrp2(1) = pgroup%ins(is) - 1
      ipgrp1(2) = pgroup%ins(is) + pgroup%nps(is)
      ipgrp2(2) = ipgrp1(2) + nabove - 1

      --- Setup the indices for the new particle arrays
      inew1(1) = 1
      inew2(1) = nbelow
      inew1(2) = inew2(1) + 1
      inew2(2) = n1

      --- Copy data from the new particles arrays into the pgroup arrays.
      --- The loop splits the copy, with the first iteration copying
      --- particles below, and the second above the existing particles.
      do ii=1,2

        --- Do nothing if there is nothing to copy
        if (inew2(ii) < inew1(ii)) cycle

        i1 = ipgrp1(ii)
        i2 = ipgrp2(ii)
        j1 = inew1(ii)
        j2 = inew2(ii)

        pgroup%xp(i1:i2) = x(j1:j2)
        pgroup%yp(i1:i2) = y(j1:j2)
        pgroup%zp(i1:i2) = z(j1:j2)
        pgroup%uxp(i1:i2) = vx(j1:j2)
        pgroup%uyp(i1:i2) = vy(j1:j2)
        pgroup%uzp(i1:i2) = vz(j1:j2)
        pgroup%gaminv(i1:i2) = gi(j1:j2)
        if (lfields) then
          pgroup%ex(i1:i2) = ex(j1:j2)
          pgroup%ey(i1:i2) = ey(j1:j2)
          pgroup%ez(i1:i2) = ez(j1:j2)
          pgroup%bx(i1:i2) = bx(j1:j2)
          pgroup%by(i1:i2) = by(j1:j2)
          pgroup%bz(i1:i2) = bz(j1:j2)
        end if
        if (nnpid > 0) pgroup%pid(i1:i2,1:nnpid) = ppid(j1:j2,:)

        if (lnewparticles) then
          if (spid > 0) then
            --- Only set the ssn if it is not already set.
            do ip=i1,i2
              if (pgroup%pid(ip,spid) == 0) then
                pgroup%pid(ip,spid) = ssn
                ssn = ssn + 1
              endif
            enddo
          end if

          --- Save birth position or velocity at birth.
          if (xbirthpid > 0) pgroup%pid(i1:i2,xbirthpid) = x(j1:j2)
          if (ybirthpid > 0) pgroup%pid(i1:i2,ybirthpid) = y(j1:j2)
          if (zbirthpid > 0) pgroup%pid(i1:i2,zbirthpid) = z(j1:j2)
          if (uxbirthpid > 0) pgroup%pid(i1:i2,uxbirthpid) = vx(j1:j2)
          if (uybirthpid > 0) pgroup%pid(i1:i2,uybirthpid) = vy(j1:j2)
          if (uzbirthpid > 0) pgroup%pid(i1:i2,uzbirthpid) = vz(j1:j2)
        endif

      enddo

      --- Update ins and nps appropriately
      pgroup%ins(is) = pgroup%ins(is) - nbelow
      pgroup%nps(is) = pgroup%nps(is) + n1

!$OMP MASTER
      if (ltoptimesubs) timeaddpart = timeaddpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[inject3d] [loadrho3d] [processlostpart]
      subroutine clearpart(pgroup,is,fillmethod)
      use ParticleGroupmodule
      use Subtimerstop
      use InPart
      use Particles,Only: wpid
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,fillmethod

  Clears out all of the lost particles. If is < 0, the clears out all
  species, otherwise only species requested.
  The empty spaces can be fill in multiple ways, based on value of fillmethod
    0 : don't do anything
    1 : fill spaces with particles from the end of the list
    2 : shift particles above down (they keep the same order)

      integer(ISZ):: is1,is2,i,ip
      integer(ISZ):: ips,ds,ipslimit
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      if (is < 1) then
        is1 = 1
        is2 = pgroup%ns
      else
        is1 = is
        is2 = is

        --- Make sure that 'is' is within the correct range
        if (is < 1 .or. is > pgroup%ns) then
          print*,"clearpart: warning: the input species is ",is,", but must be between 1 and ",pgroup%ns
          call kaboom("clearpart: species number is invalid")
          return
        endif

      endif

      if (fillmethod == 1) then

        --- Live particles from the end of the species block are moved
        --- to fill in the empty spaces.
        do i=is1,is2
          ips = pgroup%ins(i) + pgroup%nps(i) - 1
          ip = pgroup%ins(i)
          pgroup%nps(i) = 0
          do while (ip <= ips)
            if (pgroup%gaminv(ip) <= 0.) then
              do while (pgroup%gaminv(ips) <= 0. .and. ips > ip)
                ips = ips - 1
              enddo
              if (ips > ip) then
                pgroup%xp(ip) = pgroup%xp(ips)
                pgroup%yp(ip) = pgroup%yp(ips)
                pgroup%zp(ip) = pgroup%zp(ips)
                pgroup%uxp(ip) = pgroup%uxp(ips)
                pgroup%uyp(ip) = pgroup%uyp(ips)
                pgroup%uzp(ip) = pgroup%uzp(ips)
                pgroup%gaminv(ip) = pgroup%gaminv(ips)
                pgroup%ex(ip) = pgroup%ex(ips)
                pgroup%ey(ip) = pgroup%ey(ips)
                pgroup%ez(ip) = pgroup%ez(ips)
                pgroup%bx(ip) = pgroup%bx(ips)
                pgroup%by(ip) = pgroup%by(ips)
                pgroup%bz(ip) = pgroup%bz(ips)
                if (pgroup%npid > 0) then
                  pgroup%pid(ip,:) = pgroup%pid(ips,:)
                  pgroup%pid(ips,:) = 0
                endif
                pgroup%uzp(ips) = 0.
                pgroup%gaminv(ips) = 0.
                if(wpidɬ) pgroup%pid(ips,wpid) = 0.
                ips = ips - 1
                pgroup%nps(i) = pgroup%nps(i) + 1
              endif
            else
              pgroup%nps(i) = pgroup%nps(i) + 1
            endif
            ip = ip + 1
          enddo
        enddo

      else if (fillmethod == 2) then

        --- Live particles are shifted downward to fill in the empty spaces
        do i=is1,is2
          ipslimit = pgroup%ins(i)+pgroup%nps(i)-1
          pgroup%nps(i) = 0
          ip = pgroup%ins(i)
          do ips=ip,ipslimit
            if (pgroup%gaminv(ips) > 0.) then
              if (ips > ip) then
                pgroup%xp(ip) = pgroup%xp(ips)
                pgroup%yp(ip) = pgroup%yp(ips)
                pgroup%zp(ip) = pgroup%zp(ips)
                pgroup%uxp(ip) = pgroup%uxp(ips)
                pgroup%uyp(ip) = pgroup%uyp(ips)
                pgroup%uzp(ip) = pgroup%uzp(ips)
                pgroup%gaminv(ip) = pgroup%gaminv(ips)
                pgroup%ex(ip) = pgroup%ex(ips)
                pgroup%ey(ip) = pgroup%ey(ips)
                pgroup%ez(ip) = pgroup%ez(ips)
                pgroup%bx(ip) = pgroup%bx(ips)
                pgroup%by(ip) = pgroup%by(ips)
                pgroup%bz(ip) = pgroup%bz(ips)
                if (pgroup%npid > 0) then
                  pgroup%pid(ip,:) = pgroup%pid(ips,:)
                  pgroup%pid(ips,:) = 0
                endif
                pgroup%uzp(ips) = 0.
                pgroup%gaminv(ips) = 0.
                if(wpidɬ) pgroup%pid(ips,wpid) = 0.
              endif
              ip = ip + 1
              pgroup%nps(i) = pgroup%nps(i) + 1
            endif
          enddo
        enddo

      else if (fillmethod == 3) then

        --- Live particles are shifted upward to fill in the empty spaces
        do i=is1,is2
          ip = pgroup%ins(i)+pgroup%nps(i)-1
          ipslimit = pgroup%ins(i)
          pgroup%nps(i) = 0
          pgroup%ins(i) = ip + 1
          do ips=ip,ipslimit,-1
            if (pgroup%gaminv(ips) > 0.) then
              if (ips < ip) then
                pgroup%xp(ip) = pgroup%xp(ips)
                pgroup%yp(ip) = pgroup%yp(ips)
                pgroup%zp(ip) = pgroup%zp(ips)
                pgroup%uxp(ip) = pgroup%uxp(ips)
                pgroup%uyp(ip) = pgroup%uyp(ips)
                pgroup%uzp(ip) = pgroup%uzp(ips)
                pgroup%gaminv(ip) = pgroup%gaminv(ips)
                pgroup%ex(ip) = pgroup%ex(ips)
                pgroup%ey(ip) = pgroup%ey(ips)
                pgroup%ez(ip) = pgroup%ez(ips)
                pgroup%bx(ip) = pgroup%bx(ips)
                pgroup%by(ip) = pgroup%by(ips)
                pgroup%bz(ip) = pgroup%bz(ips)
                if (pgroup%npid > 0) then
                  pgroup%pid(ip,:) = pgroup%pid(ips,:)
                  pgroup%pid(ips,:) = 0
                endif
                pgroup%uzp(ips) = 0.
                pgroup%gaminv(ips) = 0.
                if(wpidɬ) pgroup%pid(ips,wpid) = 0.
              endif
              ip = ip - 1
              pgroup%nps(i) = pgroup%nps(i) + 1
              pgroup%ins(i) = pgroup%ins(i) - 1
            endif
          enddo
        enddo

      endif

!$OMP MASTER
      if (ltoptimesubs) timeclearpart = timeclearpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

      subroutine shrinkpart(pgroup)
      use ParticleGroupmodule
      use Subtimerstop
      use InPart
      type(ParticleGroup):: pgroup

  Removes all unused space from the particle arrays. Particles are shifted
  down and then gchange is called to free the left over space.

      integer(ISZ):: is,oldnpmax
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      oldnpmax = pgroup%npmax

      pgroup%ipmax(0) = 0
      call shftpart(pgroup,1,1-pgroup%ins(1))
      pgroup%ins(1) = 1
      pgroup%ipmax(1) = pgroup%ins(1) + pgroup%nps(1) - 1

      do is=2,pgroup%ns
        call shftpart(pgroup,is,pgroup%ins(is-1)+pgroup%nps(is-1)-pgroup%ins(is))
        pgroup%ins(is) = pgroup%ins(is-1)+pgroup%nps(is-1)
        pgroup%ipmax(is) = pgroup%ins(is) + pgroup%nps(is) - 1
      enddo

      pgroup%npmax = pgroup%ins(pgroup%ns) + pgroup%nps(pgroup%ns) - 1

      --- Force npmax to change to ensure that the particles arrays are
      --- reallocated.
      if (pgroup%npmax == oldnpmax) pgroup%npmax = pgroup%npmax + 1

      call ParticleGroupchange(pgroup)

!$OMP MASTER
      if (ltoptimesubs) timeshrinkpart = timeshrinkpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[padvncrz] [padvncxy] [particleboundaries3d] [particleboundariesxy]
      subroutine processlostpart(pgroup,is,clearlostpart,time,zbeam)
      use ParticleGroupmodule
      use Subtimerstop
      use Constant
      use Beam_acc
      use InPart
      use InDiag
      use Particles,Only: wpid
      use LostParticles
      use Z_arrays
      use Parallel,Only: my_index
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,clearlostpart
      real(kind=8):: time,zbeam

  Processes lost particles, which are flagged by having gaminv set to zero.
  The particle data is optionally saved. The velocities are set to zero and
  the routine which clears out the lost particles is called.

      integer(ISZ):: nlost,i1,i2,js,isid
      integer(ISZ):: ip,iz
      real(kind=8):: usq
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()
      
      --- Make sure that 'is' is within the correct range
      if (is < 1 .or. is > pgroup%ns) then
        print*,"processlostpart: warning: the input species is ",is,", but must be between 1 and ",pgroup%ns
        call kaboom("processlostpart: species number is invalid")
        return
      endif

      if (lsavelostpart) then

        --- Make sure that if npid has changed, then npidlost is also
        --- changed.
        if (pgroup%npid > npidlost) then
          npidlost = pgroup%npid
          call gchange("LostParticles",0)
        endif

        --- Get the number of lost particles and make sure there is
        --- enough space to save them in the LostParticles arrays.
        i1 = pgroup%ins(is)
        i2 = pgroup%ins(is)+pgroup%nps(is)-1
        nlost = count(pgroup%gaminv(i1:i2)==0.)
        call chcklostpart(is,0,nlost)

      endif

      --- Get global species number
      isid = pgroup%sid(is-1) + 1
      if (isid == 0) return

      --- Check lspeciesmoments, since it affects lostpars
      call checkz_arrays(ns)
      if (lspeciesmoments) then
        js = isid - 1
      else
        js = 0
      endif

      --- Loop over the particles arrays and process the ones with gaminv
      --- set to zero, the flag for scraped particles.
      do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1

        --- Accumulate the array holding the location where particles
        --- are lost.
        if (pgroup%gaminv(ip) == 0.) then
          iz = max(0,min(nzzarr,int((pgroup%zp(ip) - zzmin - zbeam)*dzzi)))
          lostpars(iz,js) = lostpars(iz,js) + 1
          if (nszarr > 0) lostpars(iz,nszarr) = lostpars(iz,nszarr) + 1
        endif

        --- If requested to save lost particles, do it. Particle data is
        --- only save if gaminv == 0. Particles with gaminv < 0 have been
        --- lost but are flagged not to be saved.
        if (lsavelostpart .and. pgroup%gaminv(ip) == 0.) then
          i1 = inslost(isid) + npslost(isid)
          xplost(i1) = pgroup%xp(ip)
          yplost(i1) = pgroup%yp(ip)
          zplost(i1) = pgroup%zp(ip)
          uxplost(i1) = pgroup%uxp(ip)
          uyplost(i1) = pgroup%uyp(ip)
          uzplost(i1) = pgroup%uzp(ip)
          exlost(i1) = pgroup%ex(ip)
          eylost(i1) = pgroup%ey(ip)
          ezlost(i1) = pgroup%ez(ip)
          bxlost(i1) = pgroup%bx(ip)
          bylost(i1) = pgroup%by(ip)
          bzlost(i1) = pgroup%bz(ip)
          if (lrelativ) then
            usq = pgroup%uxp(ip)**2 + pgroup%uyp(ip)**2 + pgroup%uzp(ip)**2
            gaminvlost(i1) = 1./sqrt(1. + usq/clight**2)
          else
            gaminvlost(i1) = 1.
          endif
          tplost(i1) = time
          if (pgroup%npid > 0) then
            pidlost(i1,1:pgroup%npid) = pgroup%pid(ip,:)
          endif
          if (npidlost > pgroup%npid) pidlost(i1,pgroup%npid+1:)=0.
          npslost(isid) = npslost(isid) + 1
        endif

      enddo

      call clearpart(pgroup,is,clearlostpart)

!$OMP MASTER
      if (ltoptimesubs) timeprocesslostpart = timeprocesslostpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

      subroutine particlesortyzwithcopy(pgroup,dy,dz,ymmin,zmmin,ny,nz)
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      real(kind=8):: dy,dz,ymmin,zmmin
      integer(ISZ):: ny,nz

  Sorts particles, using a full size extra particle array for temporary
  space.

      integer(ISZ):: is,ip,iy,iz,pindexmin
      integer(ISZ),pointer:: pindex(:)
      integer(ISZ),pointer:: npblock(:)

      allocate(npblock((1+ny)*(1+nz)))

      --- Treat the particles in each species separately
      do is=1,pgroup%ns
        allocate(pindex(pgroup%nps(is)))

        --- First, find which block each particle goes into and count how
        --- many are in each block
        pindex = 0
        do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
          iy = abs(pgroup%yp(ip) - ymmin)/dy
          iz = abs(pgroup%zp(ip) - zmmin)/dz
          pindex(ip-pgroup%ins(is)+1) = iy + (ny+1)*iz
        enddo

        --- Do the sorting
        pindexmin = minval(pindex)
        call particlesortbyindex(pgroup,pindex,pindexmin,
     &                           pgroup%ins(is),pgroup%nps(is),
     &                           (1+ny)*(1+nz),npblock)

        deallocate(pindex)

      --- End of loop over species
      enddo

      deallocate(npblock)

      return
      end

      subroutine particlesortxyzwithcopy(pgroup,dx,dy,dz,xmmin,ymmin,zmmin,nx,ny,nz)
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      real(kind=8):: dx,dy,dz,xmmin,ymmin,zmmin
      integer(ISZ):: nx,ny,nz

  Sorts particles, using a full size extra particle array for temporary
  space.

      integer(ISZ):: is,ip,ix,iy,iz,pindexmin
      integer(ISZ),pointer:: pindex(:)
      integer(ISZ),pointer:: npblock(:)

      allocate(npblock((1+nx)*(1+ny)*(1+nz)))

      --- Treat the particles in each species separately
      do is=1,pgroup%ns
        allocate(pindex(pgroup%nps(is)))

        --- First, find which block each particle goes into and count how
        --- many are in each block
        pindex = 0
        do ip=pgroup%ins(is),pgroup%ins(is)+pgroup%nps(is)-1
          ix = abs(pgroup%xp(ip) - xmmin)/dx
          iy = abs(pgroup%yp(ip) - ymmin)/dy
          iz = abs(pgroup%zp(ip) - zmmin)/dz
          pindex(ip-pgroup%ins(is)+1) = ix + (nx+1)*(iy + (ny+1)*iz)
        enddo

        --- Do the sorting
        pindexmin = minval(pindex)
        call particlesortbyindex(pgroup,pindex,pindexmin,
     &                           pgroup%ins(is),pgroup%nps(is),
     &                           (1+nx)*(1+ny)*(1+nz),npblock)

        deallocate(pindex)

      --- End of loop over species
      enddo

      deallocate(npblock)

      return
      end

[particlesortxyzwithcopy] [particlesortyzwithcopy]
      subroutine particlesortbyindex(pgroup,pindex,pindexmin,ipmin,np,
     &                               nn,npblock)
      use ParticleGroupmodule
      use Beam_acc, Only: lrelativ
      type(ParticleGroup):: pgroup
      integer(ISZ):: pindexmin,ipmin,np,nn
      integer(ISZ):: npblock(nn)
      integer(ISZ):: pindex(np)

      integer(ISZ):: ip,ipb,ipid
      integer(ISZ),pointer:: ipblock(:)
      real(kind=8),pointer:: pdata(:)
      
      allocate(ipblock(nn))

      npblock = 0

      --- First, count how many are in each block
      do ip=1,np
        ipb = pindex(ip) - pindexmin + 1
        npblock(ipb) = npblock(ipb) + 1
      enddo

      --- Calculate the starting location of each block
      ipblock(1) = 1
      do ipb=2,nn
        ipblock(ipb) = ipblock(ipb-1) + npblock(ipb-1)
      enddo

      --- Replace pindex with the new array index.
      do ip=1,np
        ipb = pindex(ip) - pindexmin + 1
        pindex(ip) = ipblock(ipb)
        ipblock(ipb) = ipblock(ipb) + 1
      enddo

      --- Now, the sort can be done.
      allocate(pdata(np))
      call particlecopysorteddata(np,pgroup%xp(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%yp(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%zp(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%uxp(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%uyp(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%uzp(ipmin:ipmin+np-1),pdata,pindex)
      if (lrelativ) then
        call particlecopysorteddata(np,pgroup%gaminv(ipmin:ipmin+np-1),pdata,pindex)
      endif
      call particlecopysorteddata(np,pgroup%ex(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%ey(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%ez(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%bx(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%by(ipmin:ipmin+np-1),pdata,pindex)
      call particlecopysorteddata(np,pgroup%bz(ipmin:ipmin+np-1),pdata,pindex)

      do ipid=1,pgroup%npid
        call particlecopysorteddata(np,pgroup%pid(ipmin:ipmin+np-1,ipid),pdata,
     &                              pindex)
      enddo

      deallocate(pdata)
      deallocate(ipblock)

      return
      end

[particlesortbyindex]
      subroutine particlecopysorteddata(npmax,source,target,pindex)
      integer(ISZ):: npmax
      real(kind=8):: source(npmax),target(npmax)
      integer(ISZ):: pindex(npmax)

      integer(ISZ):: ip,ipi

      do ip=1,npmax
        ipi = pindex(ip)
        if (ipi == 0) cycle
        target(ipi) = source(ip)
      enddo
      do ip=1,npmax
        source(ip) = target(ip)
      enddo

      return
      end

      subroutine particlesortxyzwithcopynew(pgroup,dx,dy,dz,xmmin,ymmin,zmmin,
     &                                      nx,ny,nz)
      use Beam_acc, Only: lrelativ
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      real(kind=8):: dx,dy,dz,xmmin,ymmin,zmmin
      integer(ISZ):: nx,ny,nz

  Sorts particles, moving only those particles that need to be moved.

      integer(ISZ):: nn
      integer(ISZ):: is,ip,ib,ix,iy,iz,ii,pindexmin,nmove,i1,i2,ipid,np
      integer(ISZ),pointer:: pindex(:)
      integer(ISZ),pointer:: npblock(:),ipblock(:)
      integer(ISZ),pointer:: ipfree(:)
      integer(ISZ),pointer:: ipmove(:)
      integer(ISZ),pointer:: pimove(:)
      real(kind=8),pointer:: pdata(:)

      nn = (1+nx)*(1+ny)*(1+nz)
      allocate(npblock(nn),ipblock(nn+1))

      --- Treat the particles in each species separately
      do is=1,pgroup%ns
        np = pgroup%nps(is)
        i1 = pgroup%ins(is)
        i2 = i1 + np - 1
        allocate(pindex(np))
        allocate(ipmove(np))
        allocate(ipfree(np))

        --- First, find which block each particle goes into and count how
        --- many are in each block.
        npblock = 0
        do ip=i1,i2
          ix = abs(pgroup%xp(ip) - xmmin)/dx
          iy = abs(pgroup%yp(ip) - ymmin)/dy
          iz = abs(pgroup%zp(ip) - zmmin)/dz
          ii = ix + (nx+1)*(iy + (ny+1)*iz)
          pindex(ip-i1+1) = ii
          npblock(ii) = npblock(ii) + 1
        enddo

        --- Calculate the starting location of each block in the
        --- particle arrays. Note that ipblock is one element longer.
        --- The last element will contain np+1, which makes indexing
        --- easier in a loop below for the last particles.
        --- Reassign npblock to the starting position of each block
        --- (npblock does double duty).
        ipblock(1) = 1
        do ib=2,nn+1
          ipblock(ib) = ipblock(ib-1) + npblock(ib-1)
          npblock(ib-1) = ipblock(ib-1)
        enddo

        --- Create a list of all of the particles that need to be moved,
        --- saving it in ipmove. Note that these are the same places where
        --- moved particles will be put. Create a list of these places,
        --- organized by where the places are relative to the sorted list.
        --- The ipfree is this list, and the npblock stores the location
        --- in ipfree where the list of free places are for each grid block.
        nmove = 0
        ib = 1
        do ip=1,np
          --- This find the grid block that the particle is currently in.
          do while (ip > ipblock(ib+1))
            ib = ib + 1
          enddo
          --- pindex is the grid block where the particles needs to go.
          ii = pindex(ip)
          --- If the particle is outside of the range where it is supposed to
          --- be, the save its ip, and add that ip to the list of places
          --- where particles can be moved to.
          if (ip < ipblock(ii) .or. ipblock(ii+1) <= ip) then
            nmove = nmove + 1
            ipmove(nmove) = ip
            ipfree(npblock(ib)) = ip
            npblock(ib) = npblock(ib) + 1
          endif
        enddo

        allocate(pimove(nmove))
        do ip=1,nmove
          ii = pindex(ipmove(ip))
          npblock(ii) = npblock(ii) - 1
          pimove(ip) = ipfree(npblock(ii))
        enddo

        --- Now, the sort can be done.
        allocate(pdata(nmove))
        call particlemovesorteddata(np,pgroup%xp(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%yp(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%zp(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%uxp(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%uyp(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%uzp(i1:i2),nmove,ipmove,pimove,pdata)
        if (lrelativ) then
          call particlemovesorteddata(np,pgroup%gaminv(i1:i2),nmove,ipmove,pimove,pdata)
        endif
        call particlemovesorteddata(np,pgroup%ex(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%ey(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%ez(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%bx(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%by(i1:i2),nmove,ipmove,pimove,pdata)
        call particlemovesorteddata(np,pgroup%bz(i1:i2),nmove,ipmove,pimove,pdata)

        do ipid=1,pgroup%npid
          call particlemovesorteddata(np,pgroup%pid(:,i1:i2),nmove,ipmove,pimove,pdata)
        enddo

        deallocate(pindex)
        deallocate(ipmove)
        deallocate(ipfree)
        deallocate(pimove)
        deallocate(pdata)

      --- End of loop over species
      enddo

      deallocate(npblock)
      deallocate(ipblock)

      return
      end

[particlesortxyzwithcopynew]
      subroutine particlemovesorteddata(npmax,source,nmove,ipmove,pimove,pdata)
      integer(ISZ):: npmax,nmove
      real(kind=8):: source(npmax)
      integer(ISZ):: ipmove(nmove),pimove(nmove)
      real(kind=8):: pdata(nmove)

      integer(ISZ):: ip

      do ip=1,nmove
        pdata(ip) = source(ipmove(ip))
      enddo
      do ip=1,nmove
        source(pimove(ip)) = pdata(ip)
      enddo

      return
      end

[w3dgen] [wxygen]
      subroutine alotlostpart
      use Subtimerstop
      use InPart
      use Particles,Only: npid
      use LostParticles

  Allocate space for lost particles and set up associated arrays.

      integer(ISZ):: is,ipmax
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      npidlost = max(npid,npidlost)
      call gchange("LostParticles",0)

      ipmax = 0
      do is=1,ns
        inslost(is) = ipmax + 1
        npslost(is) = 0
        ipmax = ipmax + npmaxlost*sp_fract(is)
      enddo

!$OMP MASTER
      if (ltoptimesubs) timealotlostpart = timealotlostpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[processlostpart]
      subroutine chcklostpart(is,nlower,nhigher)
      use Subtimerstop
      use InPart
      use LostParticles
      use Particles,Only: npid
      integer(ISZ):: is,nlower,nhigher

  Make sure that there is enough space in the lost particle arrays for nlower
  new particles below and nhigher above the lost particles.  Returns if
  there is already enough space above and below.  If there is enough total
  space but not enough room above or below, the lost particles are shifted
  appropriately. If there is not enough space, add more to the arrays.
  Particle data is shifted appropriately.

      integer(ISZ):: nadd,i,ishift
      integer(ISZ):: ilower,ihigher,spacebelow,spaceabove
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- Make sure that npidlost is up to date.
      if (npid > npidlost) then
        npidlost = npid
        call gchange("LostParticles",0)
      endif

      --- Make sure that ins has a sensible value for all species
      if (inslost(1) == 0) inslost(1) = 1
      do i=2,ns
        if (inslost(i) == 0) inslost(i) = inslost(i-1) + npslost(i-1)
      end do

      --- ilower is the lowest usable array location.
      if (is == 1) then
        ilower = 1
      else
        ilower = inslost(is-1) + npslost(is-1)
      endif

      --- ihigher is the highest usable array location.
      if (is == ns) then
        ihigher = npmaxlost
      else
        ihigher = inslost(is+1) - 1
      endif

      --- Calculate the available space.
      spacebelow = inslost(is) - ilower
      spaceabove = ihigher - (inslost(is) + npslost(is) - 1)

      --- If there is already enough space, then return.
      if (spacebelow >= nlower .and.
     &    spaceabove >= nhigher) return

      --- If there is not enough space, then allocate new space.
      if (nlower+nhigher > spacebelow+spaceabove) then

        --- Amount of new space to add: this is calculated to be a sizable
        --- chunk, but not too big.  By default, add 10 times the space needed.
        --- Limit that to 10000, except, if the space needed is greater than
        --- 10000, add the amount of space needed.
        nadd = nlower + nhigher - spacebelow - spaceabove
        nadd = max(lostpartchunksize, nadd)

        --- change the particle array dimension variables and allot space
        npmaxlost = npmaxlost + nadd
        call gchange("LostParticles",0)

        --- Loop over species species above 'is', shifting
        --- them up to make space below for species is.
        do i=ns,is+1,-1
          call shftlostpart(i,nadd)
        enddo

        spaceabove = spaceabove + nadd

      endif

      --- Now, there is enough room to shift the particles appropriately.
      ishift = max(0,nlower - spacebelow) +
     &         min(0,spaceabove - nhigher)
      call shftlostpart(is,ishift)

!$OMP MASTER
      if (ltoptimesubs) timechcklostpart = timechcklostpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[chcklostpart]
      subroutine shftlostpart(is,ishift)
      use Subtimerstop
      use InPart
      use LostParticles
      integer(ISZ):: is,ishift
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      if (npidlost > 0) then
        call shftpartwork(is,ishift,npmaxlost,ns,
     &                    inslost,npslost,npidlost,
     &                    xplost,yplost,zplost,uxplost,uyplost,uzplost,
     &                    gaminvlost,
     &                    .false.,0.,0.,0.,0.,0.,0.,pidlost)
      else
        call shftpartwork(is,ishift,npmaxlost,ns,
     &                    inslost,npslost,npidlost,
     &                    xplost,yplost,zplost,uxplost,uyplost,uzplost,
     &                    gaminvlost,
     &                    .false.,0.,0.,0.,0.,0.,0.,0.)
      endif

!$OMP MASTER
      if (ltoptimesubs) timeshftlostpart = timeshftlostpart + wtime() - substarttime
!$OMP END MASTER

      return
      end

[loadrho3d] [loadrhoxy] [w3dgen] [wxygen]
      subroutine setupSubcycling(pgroup)
      use ParticleGroupmodule
      use Subcycling
      type(ParticleGroup):: pgroup

  Setups in the bookkeepping variables and arrays for particle subcycling.
  These are generic and can be used by any field solver.

      integer(ISZ):: js,i,nsndtstemp,newndts

      --- If there are no particle species, then do nothing
      if (pgroup%ns == 0) return

      --- Find maximum value of ndts and use it to size the ndtstorho array
      ndtsmax = max(ndtsmax,maxval(pgroup%ndts))
      nsndts = max(1,nsndts)
      call gchange("Subcycling",0)

      --- Force the first subcycling group to be the one which advances
      --- every time step (even if no such group exists).
      ndts(0) = 1
      ndtstorho(1) = 0

      --- Find any new time step sizes which havn't been included and add
      --- space to the rhopndts array.
      newndts = 0
      do js = 0, pgroup%ns-1
        if (ndtstorho(pgroup%ndts(js)) == -1) then
          ndtstorho(pgroup%ndts(js)) = -2
          newndts = newndts + 1
        endif
      enddo
      if (newndts > 0) then
        nsndtstemp = nsndts
        nsndts = nsndts + newndts
        if (ndtsmax > 1) nrhopndtscopies = 2
        call gchange("Subcycling",0)
        do js = 0, pgroup%ns-1
          if (ndtstorho(pgroup%ndts(js)) == -2) then
            ndtstorho(pgroup%ndts(js)) = nsndtstemp
            ndts(nsndtstemp) = pgroup%ndts(js)
            nsndtstemp = nsndtstemp + 1
          endif
        enddo
      endif

      --- Make sure that nsndtsphi is set properly, and call the gchange
      --- to allocate phipndts.
      if (ndtsaveraging == 0 .or. ndtsaveraging == 1) then
        nsndtsphi = 1
      else
        nsndtsphi = nsndts
      endif
      call gchange("Subcycling",0)

      return
      end

      integer(ISZ) function getnsndtsforsubcycling()
      use Subcycling,Only: ndtsaveraging,nsndts

      integer(ISZ):: tmpnsndts

      if (ndtsaveraging == 0 .or. ndtsaveraging == 1) then
        tmpnsndts = 1
      elseif (ndtsaveraging == 2) then
        tmpnsndts = nsndts
      elseif (ndtsaveraging == 3) then
        tmpnsndts = nsndts
      endif

      getnsndtsforsubcycling = tmpnsndts
      return
      end

[padvnc3d]
      subroutine chgparticlesdts(pgroup)
      use ParticleGroupmodule
      use Particles, Only: chdtspid
      type(ParticleGroup):: pgroup
      
      integer(ISZ):: ins_init(pgroup%ns),nps_init(pgroup%ns),is,ip,idest,ip0,idest0
      integer(ISZ):: ins_save(pgroup%ns),nps_save(pgroup%ns)
      real(kind=8):: pidtemp(pgroup%npid)
      real(kind=8):: temp
      integer(ISZ):: nup,ndown

      --- first, move particles that have been flagged to the beggining or end
      --- of the particle arrays.
      do is=1,pgroup%ns

        --- initialize counter and save particle's start and number
        ip0 = pgroup%ins(is)
        ins_init(is) = pgroup%ins(is)
        nps_init(is) = pgroup%nps(is)

        --- Loop over particles and pick out the ones which have been flagged
        --- to be moved to a different ndts group.
        do while (ip0 < pgroup%ins(is) + pgroup%nps(is))

          --- If particle is going to a faster group, switch with lowest particle
          --- in the arrays.
          if (pgroup%pid(ip0,chdtspid) < 0.) then

            ip = ip0
            idest = pgroup%ins(is)
            pgroup%ins(is) = pgroup%ins(is) + 1
            pgroup%nps(is) = pgroup%nps(is) - 1

          elseif (pgroup%pid(ip0,chdtspid) > 0.) then

            ip = ip0
            idest = pgroup%ins(is) + pgroup%nps(is) - 1
            pgroup%nps(is) = pgroup%nps(is) - 1
            --- Note that the particle swapped in stills need to be checked.
            ip0 = ip0 - 1

          else
            ip = -1
          endif

          if (ip > 0) then

            temp = pgroup%xp(ip); pgroup%xp(ip) = pgroup%xp(idest); pgroup%xp(idest) = temp
            temp = pgroup%yp(ip); pgroup%yp(ip) = pgroup%yp(idest); pgroup%yp(idest) = temp
            temp = pgroup%zp(ip); pgroup%zp(ip) = pgroup%zp(idest); pgroup%zp(idest) = temp

            temp = pgroup%uxp(ip); pgroup%uxp(ip) = pgroup%uxp(idest); pgroup%uxp(idest) = temp
            temp = pgroup%uyp(ip); pgroup%uyp(ip) = pgroup%uyp(idest); pgroup%uyp(idest) = temp
            temp = pgroup%uzp(ip); pgroup%uzp(ip) = pgroup%uzp(idest); pgroup%uzp(idest) = temp

            temp = pgroup%gaminv(ip); pgroup%gaminv(ip) = pgroup%gaminv(idest); pgroup%gaminv(idest) = temp

            temp = pgroup%ex(ip); pgroup%ex(ip) = pgroup%ex(idest); pgroup%ex(idest) = temp
            temp = pgroup%ey(ip); pgroup%ey(ip) = pgroup%ey(idest); pgroup%ey(idest) = temp
            temp = pgroup%ez(ip); pgroup%ez(ip) = pgroup%ez(idest); pgroup%ez(idest) = temp

            temp = pgroup%bx(ip); pgroup%bx(ip) = pgroup%bx(idest); pgroup%bx(idest) = temp
            temp = pgroup%by(ip); pgroup%by(ip) = pgroup%by(idest); pgroup%by(idest) = temp
            temp = pgroup%bz(ip); pgroup%bz(ip) = pgroup%bz(idest); pgroup%bz(idest) = temp

            pgroup%pid(ip,chdtspid) = 0.
            pidtemp = pgroup%pid(ip,:); pgroup%pid(ip,:) = pgroup%pid(idest,:); pgroup%pid(idest,:) = pidtemp

          endif

          --- advance ip0
          ip0 = ip0 + 1

        enddo

        --- end of loop over species
      enddo

      --- Now, swap the particle between species.
      do is=1,pgroup%ns - 1
        ndown = pgroup%ins(is+1) - ins_init(is+1)
        nup = (ins_init(is) + nps_init(is)) - (pgroup%ins(is) + pgroup%nps(is))
        do ip0 = 0,min(ndown,nup)-1
          ip = pgroup%ins(is) + pgroup%nps(is) + ip0
          idest = pgroup%ins(is+1) - 1 - ip0

          temp = pgroup%xp(ip); pgroup%xp(ip) = pgroup%xp(idest); pgroup%xp(idest) = temp
          temp = pgroup%yp(ip); pgroup%yp(ip) = pgroup%yp(idest); pgroup%yp(idest) = temp
          temp = pgroup%zp(ip); pgroup%zp(ip) = pgroup%zp(idest); pgroup%zp(idest) = temp

          temp = pgroup%uxp(ip); pgroup%uxp(ip) = pgroup%uxp(idest); pgroup%uxp(idest) = temp
          temp = pgroup%uyp(ip); pgroup%uyp(ip) = pgroup%uyp(idest); pgroup%uyp(idest) = temp
          temp = pgroup%uzp(ip); pgroup%uzp(ip) = pgroup%uzp(idest); pgroup%uzp(idest) = temp

          temp = pgroup%gaminv(ip); pgroup%gaminv(ip) = pgroup%gaminv(idest); pgroup%gaminv(idest) = temp

          temp = pgroup%ex(ip); pgroup%ex(ip) = pgroup%ex(idest); pgroup%ex(idest) = temp
          temp = pgroup%ey(ip); pgroup%ey(ip) = pgroup%ey(idest); pgroup%ey(idest) = temp
          temp = pgroup%ez(ip); pgroup%ez(ip) = pgroup%ez(idest); pgroup%ez(idest) = temp

          temp = pgroup%bx(ip); pgroup%bx(ip) = pgroup%bx(idest); pgroup%bx(idest) = temp
          temp = pgroup%by(ip); pgroup%by(ip) = pgroup%by(idest); pgroup%by(idest) = temp
          temp = pgroup%bz(ip); pgroup%bz(ip) = pgroup%bz(idest); pgroup%bz(idest) = temp

          pidtemp = pgroup%pid(ip,:); pgroup%pid(ip,:) = pgroup%pid(idest,:); pgroup%pid(idest,:) = pidtemp

        enddo
        pgroup%nps(is) = pgroup%nps(is) + min(ndown,nup)
        pgroup%ins(is+1) = pgroup%ins(is+1) - min(ndown,nup)
        pgroup%nps(is+1) = pgroup%nps(is+1) + min(ndown,nup)

        --- Move any particles that are left over
        if (ndown > nup) then
          idest0 = pgroup%ins(is) + pgroup%nps(is)
          idest = idest0 + (ndown - nup) - 1
          ip0 = pgroup%ins(is+1) - (ndown - nup)
          ip = pgroup%ins(is+1) - 1
          pgroup%nps(is) = pgroup%nps(is) + (ndown - nup)
        else if (nup > ndown) then
          idest0 = pgroup%ins(is+1) - (nup - ndown)
          idest = pgroup%ins(is+1) - 1
          ip0 = pgroup%ins(is) + pgroup%nps(is)
          ip = ip0 + (nup - ndown) - 1
          pgroup%ins(is+1) = pgroup%ins(is+1) - (nup - ndown)
          pgroup%nps(is+1) = pgroup%nps(is+1) + (nup - ndown)
        endif
        if (nup .ne. ndown) then
          pgroup%xp(idest0:idest) = pgroup%xp(ip0:ip)
          pgroup%yp(idest0:idest) = pgroup%yp(ip0:ip)
          pgroup%zp(idest0:idest) = pgroup%zp(ip0:ip)
  
          pgroup%uxp(idest0:idest) = pgroup%uxp(ip0:ip)
          pgroup%uyp(idest0:idest) = pgroup%uyp(ip0:ip)
          pgroup%uzp(idest0:idest) = pgroup%uzp(ip0:ip)
  
          pgroup%gaminv(idest0:idest) = pgroup%gaminv(ip0:ip)
  
          pgroup%ex(idest0:idest) = pgroup%ex(ip0:ip)
          pgroup%ey(idest0:idest) = pgroup%ey(ip0:ip)
          pgroup%ez(idest0:idest) = pgroup%ez(ip0:ip)
  
          pgroup%bx(idest0:idest) = pgroup%bx(ip0:ip)
          pgroup%by(idest0:idest) = pgroup%by(ip0:ip)
          pgroup%bz(idest0:idest) = pgroup%bz(ip0:ip)
  
          pgroup%pid(idest0:idest,:) = pgroup%pid(ip0:ip,:)
          pgroup%pid(ip0:ip,:) = 0.
        endif

      enddo

      return
      end

[loadrho3d] [loadrhoxy] [w3dgen] [wxygen]
      subroutine setupSelfB(pgroup)
      use ParticleGroupmodule
      use SelfB
      use InPart,Only: efetch
      type(ParticleGroup):: pgroup

  Setups in the bookkeepping variables and arrays for particle which require
  the self B correction.
  These are generic and can be used by any field solver.

      integer(ISZ):: js1,js2,jsid

      logical(ISZ):: lnew

      --- Find any new species with different fselfb.
      do js1=0,pgroup%ns-1
        jsid = pgroup%sid(js1)
        if (jsid == -1) cycle
        lnew = .true.
        do js2=0,nsselfb-1
          if (pgroup%fselfb(js1) == fselfb(js2)) then
            lnew = .false.
            pgroup%iselfb(js1) = js2
            iselfb(jsid) = js2
          endif
        enddo
        if (lnew) then
          nsselfb = nsselfb + 1
          call gchange("SelfB",0)
          pgroup%iselfb(js1) = nsselfb-1
          iselfb(jsid) = nsselfb-1
          fselfb(nsselfb-1) = pgroup%fselfb(js1)
        endif
      enddo

      --- When the B correction is being done for any species, then
      --- force all species to use efetch version 3, which makes use of the
      --- precalculated E field (which will include the inductive effects).
      if (ANY(pgroup%fselfb .ne. 0.)) efetch = 3

      return
      end

      subroutine setupImplicit(pgroup)
      use ParticleGroupmodule
      use ImplicitModule
      type(ParticleGroup):: pgroup

  Setups the bookkeepping for implicit species.
  These are generic and can be used by any field solver.

      integer(ISZ):: js1,js2

      logical(ISZ):: lnew
      real(kind=8):: qom

      --- Find any new implicit species with different q/m
      do js1=0,pgroup%ns-1
        if (.not. pgroup%limplicit(js1)) cycle
        --- First check if the q/m of this species has already been registered
        if (pgroup%sm(js1+1) == 0) then
          print*,"setupImplicit: the mass of an implicit species must be",
     &           "nonzero, since chi depends on q/m"
          call kaboom("setupImplicit: the mass must be nonzero")
          return
        endif
        qom = pgroup%sq(js1+1)/pgroup%sm(js1+1)
        lnew = .true.
        do js2=0,nsimplicit-1
          if (qom == implicitfactor(js2)) then
            lnew = .false.
            pgroup%iimplicit(js1) = js2
          endif
        enddo
        --- If not registered, then add it to the list.
        if (lnew) then
          nsimplicit = nsimplicit + 1
          call gchange("ImplicitModule",0)
          implicitfactor(nsimplicit-1) = qom
          pgroup%iimplicit(js1) = nsimplicit-1
        endif
      enddo

      return
      end
  Routines for handling particle groups

[createparticlesfromfgrid]
      subroutine checkparticlegroup(pgroup,is,nlower,nhigher)
      use Subtimerstop
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,nlower,nhigher

  Make sure that there is enough space in the particle arrays for nlower
  new particles below and nhigher above the live particles.  Returns if
  there is already enough space above and below.  If there is enough total
  space but not enough room above or below, the particles are shifted
  appropriately. If there is not enough space, add more to the arrays.
  Particle data is shifted appropriately.

      integer(ISZ):: ilower,ihigher,spacebelow,spaceabove
      integer(ISZ):: naddbelow,naddabove,i,ishift
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- If there is already enough space, then return.
      if (is == 1) then
        ilower = 1
      else
        ilower = pgroup%ins(is-1) + pgroup%nps(is-1)
      endif
      if (is == pgroup%ns) then
        ihigher = pgroup%npmax
      else
        ihigher = pgroup%ins(is+1) - 1
      endif
      spacebelow = pgroup%ins(is) - ilower
      spaceabove = ihigher - (pgroup%ins(is) + pgroup%nps(is) - 1)
      if (spacebelow >= nlower .and.
     &    spaceabove >= nhigher) return

      --- If there is not enough space, then allocate new space.
      if (spacebelow + spaceabove < nlower + nhigher) then

        --- Amount of new space to add: this is calculated to be a sizable
        --- chunk, but not too big.  By default, add 10 times the space needed.
        --- Limit that to 10000, except, if the space needed is greater than
        --- 10000, add the amount of space needed.
        naddbelow = max(0,nlower - spacebelow)
        naddbelow = min(10*naddbelow, max(10000, naddbelow))
        naddabove = max(0,nhigher - spaceabove)
        naddabove = min(10*naddabove, max(10000, naddabove))

        --- change the particle array dimension variables and allot space
        pgroup%npmax = pgroup%npmax + naddbelow + naddabove
        call ParticleGroupchange(pgroup)

        --- Loop over species species above 'is', shifting
        --- them up to make space below for species is.
        do i=pgroup%ns,is+1,-1
          call shiftparticlegroup(pgroup,i,naddbelow + naddabove)
        enddo

        spaceabove = spaceabove + naddbelow + naddabove

      endif

      --- Now, there is enough room to shift the particles appropriately.
      --- Note that in some cases no shift will be needed.  In those cases,
      --- ishift comes out to be zero and shftpart returns immediately when
      --- ishift is zero.
      ishift = max(0,nlower - spacebelow) +
     &         min(0,spaceabove - nhigher)
      call shiftparticlegroup(pgroup,is,ishift)

!$OMP MASTER
      if (ltoptimesubs) timecheckparticlegroup = timecheckparticlegroup + wtime() - substarttime
!$OMP END MASTER

      return
      end

[checkparticlegroup]
      subroutine shiftparticlegroup(pgroup,is,ishift)
      use Subtimerstop
      use ParticleGroupmodule
      type(ParticleGroup):: pgroup
      integer(ISZ):: is,ishift

  Shift particles by 'ishift'.
  Assumes that the is enough space in the particle arrays to make the
  shift without clobbering other particles or going past the ends of
  the arrays.

      integer(ISZ):: ip
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      --- return if shift is zero
      if (ishift == 0) return

      --- Positive and negative shifts are treated differently since
      --- a negative shift may result in lost data.
      if (ishift > 0) then

        --- Loop only over live particles
        do ip=pgroup%ins(is)+pgroup%nps(is)-1, pgroup%ins(is), -1
          pgroup%xp(ip+ishift) = pgroup%xp(ip)
          pgroup%yp(ip+ishift) = pgroup%yp(ip)
          pgroup%zp(ip+ishift) = pgroup%zp(ip)
          pgroup%uxp(ip+ishift) = pgroup%uxp(ip)
          pgroup%uyp(ip+ishift) = pgroup%uyp(ip)
          pgroup%uzp(ip+ishift) = pgroup%uzp(ip)
          pgroup%gaminv(ip+ishift) = pgroup%gaminv(ip)
          pgroup%ex(ip+ishift) = pgroup%ex(ip)
          pgroup%ey(ip+ishift) = pgroup%ey(ip)
          pgroup%ez(ip+ishift) = pgroup%ez(ip)
          pgroup%bx(ip+ishift) = pgroup%bx(ip)
          pgroup%by(ip+ishift) = pgroup%by(ip)
          pgroup%bz(ip+ishift) = pgroup%bz(ip)
          if (pgroup%npid > 0) then
            pgroup%pid(ip+ishift,:) = pgroup%pid(ip,:)
            pgroup%pid(ip,:) = 0
          endif
          pgroup%gaminv(ip) = 0.
        enddo

        --- change lower bound of live particles
        pgroup%ins(is) = pgroup%ins(is) + ishift

      else
        --- Loop only over live particles.
        --- Note that particles will be clobbered if shifted onto species below.
        do ip=pgroup%ins(is), pgroup%ins(is)+pgroup%nps(is)-1
          pgroup%xp(ip+ishift) = pgroup%xp(ip)
          pgroup%yp(ip+ishift) = pgroup%yp(ip)
          pgroup%zp(ip+ishift) = pgroup%zp(ip)
          pgroup%uxp(ip+ishift) = pgroup%uxp(ip)
          pgroup%uyp(ip+ishift) = pgroup%uyp(ip)
          pgroup%uzp(ip+ishift) = pgroup%uzp(ip)
          pgroup%gaminv(ip+ishift) = pgroup%gaminv(ip)
          pgroup%ex(ip+ishift) = pgroup%ex(ip)
          pgroup%ey(ip+ishift) = pgroup%ey(ip)
          pgroup%ez(ip+ishift) = pgroup%ez(ip)
          pgroup%bx(ip+ishift) = pgroup%bx(ip)
          pgroup%by(ip+ishift) = pgroup%by(ip)
          pgroup%bz(ip+ishift) = pgroup%bz(ip)
          if (pgroup%npid > 0) then
            pgroup%pid(ip+ishift,:) = pgroup%pid(ip,:)
            pgroup%pid(ip,:) = 0
          endif
          pgroup%gaminv(ip) = 0.
        enddo

        --- change lower bound of live particles
        pgroup%ins(is) = pgroup%ins(is) + ishift

      endif

!$OMP MASTER
      if (ltoptimesubs) timeshiftparticlegroup = timeshiftparticlegroup + wtime() - substarttime
!$OMP END MASTER

      return
      end

      subroutine copygrouptogroup(pgroupin,nn,ii,istart,pgroupout,it)
      use ParticleGroupmodule
      use Subtimerstop
      use InPart
      integer(ISZ):: it,nn,istart
      integer(ISZ):: ii(0:nn-1)
      type(ParticleGroup):: pgroupin,pgroupout

  Copies particle data from locations given by ii to the locations starting
  at 'it', or if istart > 0, copy sequential particles from locations
  starting at 'istart' to locations starting at 'it'.

      integer(ISZ):: i,in,ij
      real(kind=8):: substarttime,wtime
      if (ltoptimesubs) substarttime = wtime()

      call setuppgroup(pgroupin)
      call setuppgroup(pgroupout)

      if (istart > 0) then
        do i=0,nn-1
          ij = istart + i
          in = it + i
          pgroupout%xp(in) = pgroupin%xp(ij)
          pgroupout%yp(in) = pgroupin%yp(ij)
          pgroupout%zp(in) = pgroupin%zp(ij)
          pgroupout%uxp(in) = pgroupin%uxp(ij)
          pgroupout%uyp(in) = pgroupin%uyp(ij)
          pgroupout%uzp(in) = pgroupin%uzp(ij)
          pgroupout%gaminv(in) = pgroupin%gaminv(ij)
          pgroupout%ex(in) = pgroupin%ex(ij)
          pgroupout%ey(in) = pgroupin%ey(ij)
          pgroupout%ez(in) = pgroupin%ez(ij)
          pgroupout%bx(in) = pgroupin%bx(ij)
          pgroupout%by(in) = pgroupin%by(ij)
          pgroupout%bz(in) = pgroupin%bz(ij)
          if (pgroupin%npid > 0) then
            pgroupout%pid(in,:) = pgroupin%pid(ij,:)
          endif
        enddo
      else
        do i=0,nn-1
          ij = ii(i)
          in = it + i
          pgroupout%xp(in) = pgroupin%xp(ij)
          pgroupout%yp(in) = pgroupin%yp(ij)
          pgroupout%zp(in) = pgroupin%zp(ij)
          pgroupout%uxp(in) = pgroupin%uxp(ij)
          pgroupout%uyp(in) = pgroupin%uyp(ij)
          pgroupout%uzp(in) = pgroupin%uzp(ij)
          pgroupout%gaminv(in) = pgroupin%gaminv(ij)
          pgroupout%ex(in) = pgroupin%ex(ij)
          pgroupout%ey(in) = pgroupin%ey(ij)
          pgroupout%ez(in) = pgroupin%ez(ij)
          pgroupout%bx(in) = pgroupin%bx(ij)
          pgroupout%by(in) = pgroupin%by(ij)
          pgroupout%bz(in) = pgroupin%bz(ij)
          if (pgroupin%npid > 0) then
            pgroupout%pid(in,:) = pgroupin%pid(ij,:)
          endif
        enddo
      endif

!$OMP MASTER
      if (ltoptimesubs) timecopyparttogroup = timecopyparttogroup + wtime() - substarttime
!$OMP END MASTER

      return
      end
   VARIOUS UTILITIES

      integer(ISZ) function nextpid()
      use Particles,Only: npid

      --- Returns the next value of pid and increments npid.
      --- Note that this should be used and that npid should not be directly
      --- changed.

      npid = npid + 1
      nextpid = npid

      return
      end

[fieldsolrz] [steprz] [wrzexe] [wrzgen]
      subroutine copyarry (source, target, nwords)
      integer(ISZ):: nwords
      real(kind=8):: source(nwords), target(nwords)
      integer(ISZ):: i

!$OMP PARALLEL DO IF (nwords > 1000)
      do 100 i = 1, nwords
         target(i) = source(i)
  100 continue
!$OMP END PARALLEL DO

      return
      end

      subroutine sumarry (source, target, nwords)
      integer(ISZ):: nwords
      real(kind=8):: source(nwords), target(nwords)
      integer(ISZ):: i

!$OMP PARALLEL DO IF (nwords > 1000)
      do 100 i = 1, nwords
         target(i) = target(i) + source(i)
  100 continue
!$OMP END PARALLEL DO

      return
      end

[hergen] [inj_setrho3d_z] [setcurr] [setrhohr]
      subroutine zeroarry (arry,n)
      integer(ISZ):: n
      real(kind=8):: arry(*)

   Sets ARRY to zero
      integer(ISZ):: i
!$OMP PARALLEL DO IF (n > 1000)
      do i=1,n
        arry(i) = 0.0
      enddo
!$OMP END PARALLEL DO

      return
      end

      subroutine writarry(nn,arry,filename)
      integer(ISZ):: nn
      real(kind=8):: arry(nn)
      character(*):: filename
      integer(ISZ):: ii

      open(unit=27,file=filename,status='new')

      do ii=1,nn
        write(27,20) arry(ii)
      enddo
 20   format(1x,e10.3)

      close(27)

      return
      end

      real(kind=8) function fnice(i,e10)
      integer(ISZ):: i
      real(kind=8):: e10
  this is used to pick nice numbers; also gives next larger power of 10.
  for: i =  -4 -3 -2 -1 0  1  2  3   4   5
   fnice = .05 .1 .2 .5 1  2  5 10  20  50
     e10 =  .1 .1  1  1 1 10 10 10 100 100
      i3 =   2  3  1  2 3  1  2  3   1   2
     i10 =  -1 -1  0  0 0  1  1  1   2   2

      integer(ISZ):: i10,i3
      real(kind=8):: a(3) = (/.2,.5,1./)
      i10=i/3
      if(i > 0) i10=(i+2)/3
      i3=i+3-3*i10
      e10=10.**i10
      fnice=a(i3)*e10
      return
      end

      subroutine psumx(a,b,n)
      integer(ISZ):: n
      real(kind=8):: a(n), b(n)
      b := partial sum of a
      integer(ISZ):: i
      b(1) = a(1)
      do i = 2, n
        b(i) = a(i) + b(i-1)
      enddo
      return
      end

      logical function inlist (list, listlen, text)
      integer(ISZ):: listlen
      character(8):: text, list(listlen)
      integer(ISZ):: i

      inlist = .false.
      do i = 1,listlen
         if (text == list(i)) inlist = .true.
      enddo

      return
      end

      real(kind=8) function wtime()
      real(kind=8):: cpu,io,sys,mem
      integer:: c,cr,cm
      real(kind=8):: walltime

      --- Get current time
      call system_clock(c,cr,cm)
      if (cm > 0.) then
        wtime = 1.*c/cr
      else
        --- If system_clock didn't work, setting cm==0, then try the
        --- system clock through C.
        call ostime(cpu, io, sys, mem)
        wtime = cpu
      endif
      wtime = walltime()


      return
      end

[cirexe] [envexe] [f3dgen] [frzgen] [wrzgen]
      subroutine wtimeon()
      real(kind=8):: wtime
      real(kind=8):: cpu,io,sys,mem
      common /wtimer/cpu,io,sys,mem

      --- turns timer on
      call ostime(cpu, io, sys, mem)
      cpu = wtime()

      return
      end

      real(kind=8) function wtimeoff()
      real(kind=8):: wtime
      real(kind=8):: cpu,io,sys,mem
      real(kind=8):: fcpu,fio,fsys,fmem
      common /wtimer/cpu,io,sys,mem

      --- returns elapsed time in milliseconds since last wtimeon call
      call ostime(fcpu, fio, fsys, fmem)
      fcpu = wtime()
      wtimeoff = (fcpu - cpu)*1.e3

      return
      end

      real(kind=8) function wtremain()
  Returns the time remaining in the job. Only works on the T3E. Otherwise
  returns a large number.
      real(kind=8):: py_tremain
      wtremain = py_tremain()
      return
      end

      logical function isdefmpiparallel()

#ifdef MPIPARALLEL
      isdefmpiparallel = .true.
#else
      isdefmpiparallel = .false.
#endif

      return
      end
   RANDOM NUMBER , QUIET-START, and RELATED ROUTINES

      real(kind=8) function wrandom(method,ii,idig,ifib1,ifib2)
      character(*):: method
      integer(ISZ):: ii,idig,ifib1,ifib2
      real(kind=8):: rnrev,wranf
      character(72):: errline

      if (method == "digitrev") then
        wrandom = rnrev(ii,idig)
      elseif (method == "pseudo") then
        wrandom = wranf()
      elseif (method == "fibonacc") then
        wrandom = mod((ifib1*(ii - 1.) + 0.5)/ifib2,1.0)
      else
        wrandom = 0.
        write(errline,'("wrandom: ERROR: random number generator method ",a8,
     &                  "not supported")') method
        call kaboom(errline)
      endif

      return
      end

      real(kind=8) function wrandomgauss(method,ii,idig1,idig2,ifib1,ifib2,
     &                                   lefficient)
      use Constant
      character(*):: method
      integer(ISZ):: ii,idig1,idig2,ifib1,ifib2
      logical(ISZ):: lefficient

   Gaussian random number generator: returns a value drawn from a normal
     distribution with mean 0 and variance 1.
   Uses Box-Muller-Marsaglia method:
   See Knuth, vol. 2, p.104 (first ed.), p.117 (second ed.)
   Alex Friedman, July 1989 (after an earlier version).

      real(kind=8):: wrandom
      real(kind=8):: y,v1,v2,sq,x,wranf
      integer(ISZ):: parity,i1
      integer(ISZ):: iflag = -1
      --- The save is necessary to preserve state memory
      save y, iflag

      --- The algorithm used calculated two random numbers at a time. If it is
      --- not needed, there is no reason to repeat the calculation. In this
      --- case, the extra number calculated is saved and returned on the next
      --- call. This happens when lefficient is true - which requires that
      --- this routine be called in the correct order. Otherwise, the work
      --- is repeated each time and the appropriate of the two numbers is
      --- returned.

      parity = mod(ii,2)
      if (lefficient) then

        --- In order to have a consistent stream of random numbers, the x random
        --- number is always taken from ii is odd, and the y when ii is even.
        --- This is really only needed for the parallel version.
        --- If iflag is inconsistent with the parity of ii, then adjust iflag.
        --- If odd ii is expected but ii is even, then set calculation to use
        --- ii-1 so that number used for v1 below is odd.

        iflag = -iflag
        i1 = ii
        if (iflag < 0 .and. parity == 0) then
          wrandomgauss = y
          return
        elseif (iflag < 0 .and. parity == 1) then
          iflag = -iflag
        elseif (iflag > 0 .and. parity == 0) then
          iflag = -iflag
          i1 = ii - 1
        endif

      else
        if (parity == 0) then
          i1 = ii - 1
        else
          i1 = ii
        endif
      endif

      v1 = wrandom(method,i1,idig1,ifib1,ifib2)
      v2 = 2.*pi*wrandom(method,i1+1,idig2,ifib1,ifib2)
      sq = sqrt(-2.*log(v1))
      x = sq*cos(v2)
      y = sq*sin(v2)
      if (parity == 1) then
        wrandomgauss = x
      else
        wrandomgauss = y
      endif

      return
      end

      real(kind=8) function r2rev(xs)
      real(kind=8):: xs
   xs must be initialized before use (usually to 0)
   and preserved between calls.
   value is given by bit-reversed counter -compare sorter in cpft.
   xs=.000,.100,.010,.110,.001,.101,.011,.111,.0001.. (binary fractions)
   is the value of r2rev on the NEXT call. 
      real(kind=8):: xsi
      r2rev = xs
      xsi = 1.
   42  xsi = .5 * xsi
       xs = xs - xsi
      if (xs >= 0.) go to 42
      xs = xs + 2. * xsi
      return
      end

      real(kind=8) function rnrev(i,nbase)
      integer(ISZ):: i,nbase
   Returns a fraction (0 <= rnrev <= 1) representing the
   nbase-reversed number corresponding to i; i.e.
   for i written in base nbase as n4n3n2n1.0, this returns
   rnrev = 0.n1n2n3n4, where n1, n2.. are the digits in base n.
   Disregards roundoff errors for nbase not a power of 2.
   From N. Otani, 1985
      integer(ISZ):: j,n,jnext
      real(kind=8):: powern
      rnrev = 0.
      j = i
      n = nbase
      powern = 1.
   10 jnext = j / n
      powern = powern / n
      rnrev = rnrev + (j-jnext*n) * powern
      if (jnext == 0) return
      j = jnext
      go to 10
      end

      subroutine rnrevarray(n,x,i,nbase)
      integer(ISZ):: n,i,nbase
      real(kind=8):: x(n)
  Fills an array with uniformly distributed digit reversed numbers.
  The numbers range from 0 to 1.
      real(kind=8):: rnrev
      integer(ISZ):: j
      do j=1,n
        x(j) = rnrev(i+j-1,nbase)
      enddo
      return
      end

      subroutine sphere4(a,b,c,d,n)
      use Constant
      integer(ISZ):: n
      real(kind=8):: a(n), b(n), c(n), d(n)
    Distribute points on surface of a 4-d unit sphere.
    (a*a + b*b) increases, and (c*c + d*d) decreases, monotonically.
      real(kind=8):: xsrev,rr,phi,r2rev,rnrev
      integer(ISZ):: i

      xsrev = 0.                ! get same sequence if call again
      do i = 1,n
        rr = sqrt((i-.5)/n)
        phi = 2.*pi*r2rev(xsrev)
        a(i) = rr*cos(phi)
        b(i) = rr*sin(phi)
        phi = 2.*pi*rnrev(i-1,3)
        rr = sqrt(1.-rr*rr)
        c(i) = rr*cos(phi)
        d(i) = rr*sin(phi)
      enddo
      return
      end

      subroutine sphere4f(a,b,c,d,g1,g2,g3)
      use Constant
      integer(ISZ):: g1,g2,g3
      real(kind=8):: a(g1), b(g1), c(g1), d(g1)
    Distribute points on surface of a 4-d unit sphere using fibonacci numbers.
    (a*a + b*b) increases, and (c*c + d*d) decreases, monotonically.
      integer(ISZ):: i
      real(kind=8):: rr,phi

      do i = 1,g1
         rr = sqrt((i-.5)/g1)
         phi = 2.*pi*mod((g2*(i-1)+0.5)/g1, 1.0)
         a(i) = rr*cos(phi)
         b(i) = rr*sin(phi)
         phi = 2.*pi*mod((g3*(i-1)+0.5)/g1, 1.0)
         rr = sqrt(1.-rr*rr)
         c(i) = rr*cos(phi)
         d(i) = rr*sin(phi)
      enddo
      return
      end

      real(kind=8) function rnorm ()
      use Constant

   Gaussian random number generator: returns a value drawn from a normal
     distribution with mean 0 and variance 1.
   Uses Box-Muller-Marsaglia method:
   See Knuth, vol. 2, p.104 (first ed.), p.117 (second ed.)
   Alex Friedman, July 1989 (after an earlier version).

      --- This common block necessary to preserve state memory:
      real(kind=8):: y,v1,v2,sq,x,wranf
      real(kind=8):: s
      integer(ISZ):: iflag = -1
      save y, iflag

      iflag = -iflag
      if (iflag < 0) then
         rnorm = y
         return
      endif

  100 v1 = 2. * wranf() - 1.
      v2 = 2. * wranf() - 1.
      s = v1**2 + v2**2 + SMALLPOS
      if (s > 1.) go to 100
      sq = sqrt(-2. * log(s) / s)
      x = v1 * sq
      y = v2 * sq
      rnorm = x

      v1 = wranf()
      v2 = 2.*pi*wranf()
      sq = sqrt(-2.*log(v1))
      x = sq*cos(v2)
      y = sq*sin(v2)
      rnorm = x

      return
      end

[stptclrz]
      subroutine rnormdig (i1,n,nbase1,nbase2,dx,x)
      use Constant
      integer(ISZ):: i1,n,nbase1,nbase2
      real(kind=8):: dx
      real(kind=8):: x(n)

   Gaussian random number generator: returns a value drawn from a normal
     distribution with mean 0 and variance 1.
     chosen using digit reversed type random numbers
   Uses Box-Muller-Marsaglia method:
   See Knuth, vol. 2, p.104 (first ed.), p.117 (second ed.)
   Alex Friedman, July 1989 (after an earlier version).
   David P. Grote, Febuary 1990 (added digit reversed stuff)
      integer(ISZ):: i,iparity
      real(kind=8):: s,phi,sq,rnrev

      --- Get first number separately if parity of i1 is even. Since i1
      --- always starts with 1, the first number is always odd to keep
      --- consistency. This primarily effects the parallel version.
      --- The parity of i1+1 is used for convenience.
      iparity = mod(i1+1,2)
      if (iparity == 1) then
        s = rnrev(i1-1,nbase1) + dx
        phi = 2.*pi*rnrev(i1,nbase2)
        sq = sqrt(-2.*log(s))
        x(1) = sq*sin(phi)
      endif

      do i=i1+iparity,i1+n-2,2
        s = rnrev(i,nbase1) + dx
        phi = 2.*pi*rnrev(i+1,nbase2)
        sq = sqrt(-2.*log(s))
        x(i-i1+1) = sq*cos(phi)
        x(i-i1+2) = sq*sin(phi)
      enddo

      --- Get last number if (n-iparity) is odd.
      if (mod(n-iparity,2) == 1) then
        s = rnrev(i1+n-1,nbase1) + dx
        phi = 2.*pi*rnrev(i1+n,nbase2)
        sq = sqrt(-2.*log(s))
        x(n) = sq*cos(phi)
      endif

      return
      end

      real(kind=8) function rm()

   Crude approximation to a Gaussian with mean 0, standard deviation 1
   Advantage is, it cuts off smoothly at 3 root 2. 
   A similar one (used in some older AF codes) adds 12 variates, subtracts 6.

      real(kind=8):: wranf
      real(kind=8):: root2 = 1.41421356237
      rm = root2*(wranf()+wranf()+wranf()+wranf()+wranf()+wranf()-3.)
      return
      end

      subroutine rma(a,n)
      integer(ISZ):: n
      real(kind=8):: a(n)
    From interpreter, rma(&a,n) returns n Gaussian random numbers.
      integer(ISZ):: i
      real(kind=8):: rm

      do i = 1,n
        a(i) = rm()
      enddo
      return
      end

      subroutine load1d(np,x,nx,n,dx)
      integer(ISZ):: np,nx
      real(kind=8):: x(np),n(0:nx)
      real(kind=8):: dx

  Load particles onto a one dimensional distribution.  This load is only
  approximate in that when the resulting particles are loaded back onto
  the same 1-D grid from which the distribution is taken, the distribution
  is not exactly reproduced. (The exact version is still under development.)

      real(kind=8):: rnrev
      integer(ISZ):: i,ip,ix,di
      real(kind=8):: nnorm(0:nx),n1x(0:nx),sumn
      real(kind=8):: nintx(0:nx)

      if (minval(n) < 0.) then
        call kaboom("load1d: all values of n must be >= 0")
        return
      endif

      sumn = sum(n) - 0.5*n(0) - 0.5*n(nx)
      nnorm = n/sumn

      --- nintx is the partial sum of n
      --- nintx(0:nx) = psum(n) - 0.5*n(0) - 0.5*n
      nintx(0) = 0.
      do ix=1,nx
        nintx(ix) = nintx(ix-1) + 0.5*(nnorm(ix-1) + nnorm(ix))
      enddo

      --- Now distribute the particles
      --- The algorithm assumes that n is piece-wise linear between grid
      --- points and uses the analytic integration of that form between
      --- the grid points.
      --- When the distribution has a nearly uniform section, a modified
      --- expression is required.
      do ip=1,np

        --- Initialize xp with a uniform load
        x(ip) = .5/np + wranf()*(np-1.)/np
        x(ip) = rnrev(ip,2)

        --- Linear search
        i=0
        do while (x(ip) > nintx(i+1))
          i=i+1
        end do

        --- Binary search - is some error checking needed?
        i = nx/2
        di = nx/4
        do while (x(ip) < nintx(i) .or. x(ip) > nintx(i+1))
          if (x(ip) < nintx(i)) then
            i = i - di
          else
            i = i + di
          endif
          if (di > 1) di = di/2
        enddo

        if (abs(nnorm(i+1)-nnorm(i))/(nnorm(i+1)+nnorm(i)) > 1.e-4) then
          x(ip)=((sqrt(nnorm(i)**2
     &           - 2.*(nnorm(i+1)-nnorm(i))*(nintx(i) - x(ip))) -
     &           nnorm(i))/(nnorm(i+1)-nnorm(i)) + i)*dx
        else
          x(ip) = ((x(ip) - nintx(i))/nnorm(i)  + i)*dx
        endif

      enddo

      return
      end

      subroutine load2d(np,x,y,nx,ny,n,dx,dy)
      integer(ISZ):: np,nx,ny
      real(kind=8):: x(np),y(np),n(0:nx,0:ny)
      real(kind=8):: dx,dy

  Load particles onto a two dimensional distribution.  This load is only
  approximate in that when the resulting particles are loaded back onto
  the same 2-D grid from which the distribution is taken, the distribution
  is not exactly reproduced. (The exact version is still under development.)

      real(kind=8):: dyi,ni,nip1,ninti,sumny,sumnx,wy
      real(kind=8):: rnrev
      integer(ISZ):: i,ip,ix,iy,di
      real(kind=8):: n1x(0:nx),n1y(0:ny)
      real(kind=8):: nintx(0:nx),ninty(0:ny)

      if (minval(n) < 0.) then
        call kaboom("load2d: all values of n must be >= 0")
        return
      endif

      dyi = 1./dy

      --- Initialize xp and yp
      do i=1,np
        x(i) = .5/np + wranf()*(np-1.)/np
        x(i) = rnrev(i,2)
        y(i) = .5/np + (i-1.)/np
      enddo

      --- n1y = sum(n,1)
      do iy=0,ny
        n1y(iy) = sum(n(:,iy))
      enddo
      sumny = sum(n1y) - 0.5*n1y(0) - 0.5*n1y(ny)
      n1y = n1y/sumny

      --- ninty(0:ny) = psum(n1y) - 0.5*n1y(0) - 0.5*n1y
      ninty(0) = 0.
      do iy=1,ny
        ninty(iy) = ninty(iy-1) + 0.5*(n1y(iy-1) + n1y(iy))
      enddo

      --- Now distribute the particles in y
      --- When the distribution has a nearly uniform section, a modified
      --- expression is required.
      do ip=1,np

        --- Linear search
        i=0
        do while (y(ip) > ninty(i+1))
          i=i+1
        end do

        --- Binary search - is some error checking needed?
        i = ny/2
        di = ny/4
        do while (y(ip) < ninty(i) .or. y(ip) > ninty(i+1))
          if (y(ip) < ninty(i)) then
            i = i - di
          else
            i = i + di
          endif
          if (di > 1) di = di/2
        enddo

        if (abs(n1y(i+1)-n1y(i))/(n1y(i+1)+n1y(i)) > 1.e-4) then
          y(ip)=((sqrt(n1y(i)**2 - 2.*(n1y(i+1)-n1y(i))*(ninty(i) - y(ip))) -
     &           n1y(i))/(n1y(i+1)-n1y(i)) + i)*dy
        else
          y(ip) = ((y(ip) - ninty(i))/n1y(i)  + i)*dy
        endif
      enddo

      do ip=1,np
        iy = y(ip)*dyi
        wy = y(ip)*dyi - iy

        --- n1x = (n(,iy)*(1.-wy)+n(,iy+1)*wy)/sum(n(,iy)*(1.-wy)+n(,iy+1)*wy)
        n1x = n(:,iy)*(1. - wy) + n(:,iy+1)*wy
        sumnx = sum(n1x) - 0.5*n1x(0) - 0.5*n1x(nx)
        n1x = n1x/sumnx

        --- nintx = psum(n1x) - 0.5*n1x
        nintx(0) = 0.
        do ix=1,nx
          nintx(ix) = nintx(ix-1) + 0.5*(n1x(ix-1) + n1x(ix))
        enddo

        --- Linear search
        i=0
        do while (x(ip) > nintx(i+1))
          i=i+1
        end do

        --- Binary search - is some error checking needed?
        i = nx/2
        di = nx/4
        do while (x(ip) < nintx(i) .or. x(ip) > nintx(i+1))
          if (x(ip) < nintx(i)) then
            i = i - di
          else
            i = i + di
          endif
          if (di > 1) di = di/2
        enddo

        ni = n1x(i)
        nip1 = n1x(i+1)
        ninti = nintx(i)
        if (abs(nip1-ni)/(nip1+ni) > 1.e-4) then
          x(ip) = ((sqrt(max(0.,ni**2 - 2.*(nip1-ni)*(ninti - x(ip))))-ni)/
     &            (nip1-ni) + i)*dx
        else
          x(ip) = ((x(ip) - ninti)/ni  + i)*dx
        endif
      enddo

      return
      end

      integer(ISZ) function oneiftrue(l)
      logical(ISZ), intent(IN) :: l

        if(l) then
          oneiftrue = 1
        else
          oneiftrue = 0
        end if

        return
      end function oneiftrue

[gett]
      subroutine reset_temperature(is)
      use Temperatures
      use Timers
      integer(ISZ):: is
      real(kind=8):: timetemp,wtime
      timetemp = wtime()
        pnumt = 0.
        pnumtw = 0.
        vxbart = 0.
        vybart = 0.
        vzbart = 0.
        vxsqbart = 0.
        vysqbart = 0.
        vzsqbart = 0.
        kebart = 0.
        kesqbart = 0.
        xkebart = 0.
        ykebart = 0.
        zkebart = 0.
        tempxz(:,is) = 0.
        tempyz(:,is) = 0.
        tempzz(:,is) = 0.
        tempx(:,:,:,is) = 0.
        tempy(:,:,:,is) = 0.
        tempz(:,:,:,is) = 0.
        dke(:,:,:,is) = 0.
        if(l_temp_rmcorrelations .or. l_temp_rmcrosscorrelations) then
          xbart = 0.
          ybart = 0.
          zbart = 0.
          xsqbart = 0.
          ysqbart = 0.
          zsqbart = 0.
        endif
        if(l_temp_rmcorrelations) then
          xvxbart = 0.
          yvybart = 0.
          zvzbart = 0.
        endif
        if(l_temp_rmcrosscorrelations) then
          xybart = 0.
          xzbart = 0.
          yzbart = 0.
          xvybart = 0.
          xvzbart = 0.
          yvxbart = 0.
          yvzbart = 0.
          zvxbart = 0.
          zvybart = 0.
        endif
      temperaturestime = temperaturestime + (wtime() - timetemp)
      return
      end

[gett]
      subroutine accumulate_temperature(np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,dt,
     &                          uxpo,uypo,uzpo,is,wp,lw,lrtheta,l2symtry,l4symtry)
      Compute temperature in Z-slices for species 'is' on a 3-D grid:
        - the slices can have any position and thickness but cannot overlap,
        - the min and max of each slice in x, y and z are given respectively in the arrays
          tslicexmin, tslicexmax, tsliceymin, tsliceymax, tslicezmin and tslicezmax
          (the reason for having slices with different thickness and dimensions is
           to allow the temperature measurement to the shape of a distribution, like
           for example a beam extending over several quadrupoles and accelerating gaps),
        - the x, y and z temperatures are given in the arrays tempx, tempy and tempz,
          while averages in each slice are given in the arrays tempxz, tempyz, tempzz.
        - the calculation is done in three parts:
          o reset_temperature: zero out all moments
          o accumulate_temperature: accumulate moments from particles
          o finalize_temperature: compute final quantities
        - l_temp_collapseinz=.true.: collapse slices in z, i.e. align Z-locations
                                     of particles using current velocity (uxp,uyp,yzp) and
                                     velocity from previous time step (uxpo,uypo,uzpo)
        - lrtheta=.true.: radial and azimuthal are computed in place of X and Y,
        - the default temperature unit is in electron-volt. To select the units,
          set the variable t_unit to the default integers evolt, joule or kelvin.

      use Beam_acc
      use InDiag
      use Picglb
      use ExtPart
      use Temperatures
      use Timers

      integer(ISZ):: np,is
      real(kind=8):: w,dt,ke
      real(kind=8):: xp(np), yp(np), zp(np)
      real(kind=8):: uxp(np), uyp(np), uzp(np), gaminv(np), wp(np)
      real(kind=8):: uxpo(np), uypo(np), uzpo(np) ! uxp, uyp anx uzp of previous time step
      logical(ISZ):: lw,lrtheta,l2symtry,l4symtry

      integer(ISZ):: ip,its,izl,ixt,iyt,izt
      real(kind=8):: dti,wt,ddx,ddy,oddx,oddy,xt,yt,wpp,z_local
      real(kind=8):: oneondt,clighti,vzi,zc
      real(kind=8):: xpt,ypt,zpt,vxpt,vypt,vzpt,cost,sint,rpt
      real(kind=8):: timetemp,wtime

      timetemp = wtime()

      if (np==0) return

      wpp = 1.

      oneondt = 1./dvnz(dt)

        --- loop over particles
      do ip=1,np
        z_local = zp(ip)-zbeam
        izl  = 1+int((z_local - tloc_zmin)*tloc_dzi)

        --- cycle if particle out of zone of calculation
        if(izlə .or. izl>nztlocator) cycle

        --- loop over temperature slices
        do its = 1, ntl(izl)
          izt = tslice_locator(izl,its)

          if(l_temp_collapseinz) then
            --- collapse slice in z, i.e. align Z-locations of particles using
            --- current velocity and from previous time step
            zc = 0.5*(tslicezmin(izt)+tslicezmax(izt))
            vzi = 1./(uzp(ip)*gaminv(ip)+SMALLPOS)
            dti  = (zbeam+zc-zp(ip))*vzi
            xpt  = xp(ip) + uxp(ip)*dti*gaminv(ip)
            ypt  = yp(ip) + uyp(ip)*dti*gaminv(ip)
            zpt  = z_local + uzp(ip)*dti*gaminv(ip)
            vxpt = (uxp(ip)*(1. + dti*oneondt) - uxpo(ip)*dti*oneondt) * gaminv(ip)
            vypt = (uyp(ip)*(1. + dti*oneondt) - uypo(ip)*dti*oneondt) * gaminv(ip)
            vzpt = (uzp(ip)*(1. + dti*oneondt) - uzpo(ip)*dti*oneondt) * gaminv(ip)
          else
            xpt  = xp(ip)
            ypt  = yp(ip)
            zpt  = z_local
            vxpt = uxp(ip) * gaminv(ip)
            vypt = uyp(ip) * gaminv(ip)
            vzpt = uzp(ip) * gaminv(ip)
          end if
          if(lrtheta) then
            rpt = sqrt(xpt**2+ypt**2)
            if(rpt>SMALLPOS) then
              cost = xpt/rpt
              sint = ypt/rpt
              xpt = vxpt
              vxpt = vxpt*cost + vypt*sint
              vypt = -xpt*sint + vypt*cost
            else
              vxpt = sqrt(vxpt**2+vypt**2)
              vypt = 0.
            end if
            xpt = rpt
            ypt = 0.
          else
            if(l2symtry) ypt = abs(ypt)
            if(l4symtry) then
              xpt=abs(xpt)
              ypt=abs(ypt)
            end if
          end if

          --- cycle if particle not in slice
          if(lrtheta) then
            if(xpt>tslicexmax(izt) .or.
     &         zpt<=tslicezmin(izt) .or. zpt>tslicezmax(izt)) cycle
          else
            if(xpt<=tslicexmin(izt) .or. xpt>tslicexmax(izt) .or.
     &         ypt<=tsliceymin(izt) .or. ypt>tsliceymax(izt) .or.
     &         zpt<=tslicezmin(izt) .or. zpt>tslicezmax(izt)) cycle
          end if

          --- compute location in arrays
          xt = (xpt-tslicexmin(izt))*dxti(izt)
          yt = (ypt-tsliceymin(izt))*dyti(izt)
          ixt = max(0,min(nxtslices-1,int(xt)))
          iyt = max(0,min(nytslices-1,int(yt)))

          if(lw) wpp = wp(ip)
          wt=wpp
          
          --- deposit data in arrays
          pnumt  (ixt,iyt,izt) = pnumt  (ixt,iyt,izt) + 1.
          pnumtw (ixt,iyt,izt) = pnumtw (ixt,iyt,izt) + wt

          vxbart  (ixt,iyt,izt) = vxbart  (ixt,iyt,izt) + wt * vxpt
          vybart  (ixt,iyt,izt) = vybart  (ixt,iyt,izt) + wt * vypt
          vzbart  (ixt,iyt,izt) = vzbart  (ixt,iyt,izt) + wt * vzpt

          vxsqbart(ixt,iyt,izt) = vxsqbart(ixt,iyt,izt) + wt * vxpt**2
          vysqbart(ixt,iyt,izt) = vysqbart(ixt,iyt,izt) + wt * vypt**2
          vzsqbart(ixt,iyt,izt) = vzsqbart(ixt,iyt,izt) + wt * vzpt**2

          if (lrelativ) then
            ke = (1./gaminv(ip)-1.)
          else
            ke = vxpt*vxpt + vypt*vypt + vzpt*vzpt
          end if
          kebart  (ixt,iyt,izt) = kebart  (ixt,iyt,izt) + wt * ke
          kesqbart(ixt,iyt,izt) = kesqbart(ixt,iyt,izt) + wt * ke**2

          if(l_temp_rmcorrelations .or. l_temp_rmcrosscorrelations) then
            xbart   (ixt,iyt,izt) = xbart   (ixt,iyt,izt) + wt * xpt
            ybart   (ixt,iyt,izt) = ybart   (ixt,iyt,izt) + wt * ypt
            zbart   (ixt,iyt,izt) = zbart   (ixt,iyt,izt) + wt * zpt

            xsqbart (ixt,iyt,izt) = xsqbart (ixt,iyt,izt) + wt * xpt**2
            ysqbart (ixt,iyt,izt) = ysqbart (ixt,iyt,izt) + wt * ypt**2
            zsqbart (ixt,iyt,izt) = zsqbart (ixt,iyt,izt) + wt * zpt**2
          endif

          if(l_temp_rmcorrelations .or. l_temp_rmcrosscorrelations) then
            xvxbart (ixt,iyt,izt) = xvxbart (ixt,iyt,izt) + wt * xpt * vxpt
            yvybart (ixt,iyt,izt) = yvybart (ixt,iyt,izt) + wt * ypt * vypt
            zvzbart (ixt,iyt,izt) = zvzbart (ixt,iyt,izt) + wt * zpt * vzpt
            xkebart (ixt,iyt,izt) = xkebart (ixt,iyt,izt) + wt * xpt * ke
            ykebart (ixt,iyt,izt) = ykebart (ixt,iyt,izt) + wt * ypt * ke
            zkebart (ixt,iyt,izt) = zkebart (ixt,iyt,izt) + wt * zpt * ke
          endif

          if(l_temp_rmcrosscorrelations) then
            xybart (ixt,iyt,izt) = xybart (ixt,iyt,izt) + wt * xpt * ypt
            xzbart (ixt,iyt,izt) = xzbart (ixt,iyt,izt) + wt * xpt * zpt
            yzbart (ixt,iyt,izt) = yzbart (ixt,iyt,izt) + wt * ypt * zpt
            xvybart (ixt,iyt,izt) = xvybart (ixt,iyt,izt) + wt * xpt * vypt
            xvzbart (ixt,iyt,izt) = xvzbart (ixt,iyt,izt) + wt * xpt * vzpt
            yvxbart (ixt,iyt,izt) = yvxbart (ixt,iyt,izt) + wt * ypt * vxpt
            yvzbart (ixt,iyt,izt) = yvzbart (ixt,iyt,izt) + wt * ypt * vzpt
            zvxbart (ixt,iyt,izt) = zvxbart (ixt,iyt,izt) + wt * zpt * vxpt
            zvybart (ixt,iyt,izt) = zvybart (ixt,iyt,izt) + wt * zpt * vypt
          endif

        enddo
      enddo

      temperaturestime = temperaturestime + (wtime() - timetemp)
      return
      end

[gett]
      subroutine finalize_temperature(is,m,lrtheta)
      use Constant
      use Temperatures
      use Timers
      use Beam_acc,only:lrelativ
      integer(ISZ):: is, ixt, iyt, izt
      real(kind=8):: m,timetemp,wtime
      real(kind=8):: pnumi,tfact,tottmp,kevar
      real(kind=8):: xvar,vxvar,xvxbar,xvybar,xvzbar,xybar,xkebar
      real(kind=8):: yvar,vyvar,yvxbar,yvybar,yvzbar,xzbar,ykebar
      real(kind=8):: zvar,vzvar,zvxbar,zvybar,zvzbar,yzbar,zkebar
      real(kind=8):: a1,a2,a3,b1,b2,b3,c1,c2,c3,d1x,d2x,d3x,d1y,d2y,d3y,d1z,d2z,d3z,d1ke,d2ke,d3ke,d
      real(kind=8)::xvxslope,xvyslope,xvzslope,xkeslope
      real(kind=8)::yvxslope,yvyslope,yvzslope,ykeslope
      real(kind=8)::zvxslope,zvyslope,zvzslope,zkeslope
      logical(ISZ)::lrtheta
      timetemp = wtime()

#ifdef MPIPARALLEL
      --- For slave, call routine which sums moments over processors.
      call parallel_sum_temperature
#endif

      --- set multiplying factor for proper units
      if(t_units==evolt)  tfact = 0.5*m/echarge
      if(t_units==joule)  tfact = 0.5*m
      if(t_units==kelvin) tfact = 0.5*m/boltzmann

      --- Complete the calculation of temperatures: divide by particle number
      do izt = 1, nztslices
        do iyt = 0, nytslices
          do ixt = 0, nxtslices
            pnumi = 1./(pnumtw(ixt,iyt,izt)+SMALLPOS)

            --- Compute averages
            vxbart  (ixt,iyt,izt) = vxbart  (ixt,iyt,izt) * pnumi
            vybart  (ixt,iyt,izt) = vybart  (ixt,iyt,izt) * pnumi
            vzbart  (ixt,iyt,izt) = vzbart  (ixt,iyt,izt) * pnumi

            vxsqbart(ixt,iyt,izt) = vxsqbart(ixt,iyt,izt) * pnumi
            vysqbart(ixt,iyt,izt) = vysqbart(ixt,iyt,izt) * pnumi
            vzsqbart(ixt,iyt,izt) = vzsqbart(ixt,iyt,izt) * pnumi

            kebart  (ixt,iyt,izt) = kebart  (ixt,iyt,izt) * pnumi
            kesqbart(ixt,iyt,izt) = kesqbart(ixt,iyt,izt) * pnumi

            --- Compute second order moments with averages subtracted
            vxvar = vxsqbart(ixt,iyt,izt) - vxbart(ixt,iyt,izt)**2
            vyvar = vysqbart(ixt,iyt,izt) - vybart(ixt,iyt,izt)**2
            vzvar = vzsqbart(ixt,iyt,izt) - vzbart(ixt,iyt,izt)**2
            kevar = kesqbart(ixt,iyt,izt) - kebart(ixt,iyt,izt)**2

            if (pnumt(ixt,iyt,izt)<=5.) then
              tempx(ixt,iyt,izt,is) = 0.
              tempy(ixt,iyt,izt,is) = 0.
              tempz(ixt,iyt,izt,is) = 0.
              dke(ixt,iyt,izt,is) = 0.
              cycle
            end if

            --- Compute linear correlations
            if(l_temp_rmcorrelations .or. l_temp_rmcorrelations) then
              xbart   (ixt,iyt,izt) = xbart   (ixt,iyt,izt) * pnumi
              ybart   (ixt,iyt,izt) = ybart   (ixt,iyt,izt) * pnumi
              zbart   (ixt,iyt,izt) = zbart   (ixt,iyt,izt) * pnumi

              xsqbart (ixt,iyt,izt) = xsqbart (ixt,iyt,izt) * pnumi
              ysqbart (ixt,iyt,izt) = ysqbart (ixt,iyt,izt) * pnumi
              zsqbart (ixt,iyt,izt) = zsqbart (ixt,iyt,izt) * pnumi

              xvar  = xsqbart (ixt,iyt,izt) - xbart (ixt,iyt,izt)**2
              yvar  = ysqbart (ixt,iyt,izt) - ybart (ixt,iyt,izt)**2
              zvar  = zsqbart (ixt,iyt,izt) - zbart (ixt,iyt,izt)**2
  
               if (.not. l_temp_rmcrosscorrelations) then
                xvxbart (ixt,iyt,izt) = xvxbart (ixt,iyt,izt) * pnumi
                yvybart (ixt,iyt,izt) = yvybart (ixt,iyt,izt) * pnumi
                zvzbart (ixt,iyt,izt) = zvzbart (ixt,iyt,izt) * pnumi
                xvxbar  = xvxbart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*vxbart(ixt,iyt,izt)
                yvybar  = yvybart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*vybart(ixt,iyt,izt)
                zvzbar  = zvzbart (ixt,iyt,izt) - zbart (ixt,iyt,izt)*vzbart(ixt,iyt,izt)
                if(abs(xvar)>SMALLPOS) vxvar = vxvar - xvxbar**2/xvar
                if(abs(yvar)>SMALLPOS) vyvar = vyvar - yvybar**2/yvar
                if(abs(zvar)>SMALLPOS) vzvar = vzvar - zvzbar**2/zvar
               else ! l_temp_rmcrosscorrelations is true
                xvxbart (ixt,iyt,izt) = xvxbart (ixt,iyt,izt) * pnumi
                xvybart (ixt,iyt,izt) = xvybart (ixt,iyt,izt) * pnumi
                xvzbart (ixt,iyt,izt) = xvzbart (ixt,iyt,izt) * pnumi
                yvxbart (ixt,iyt,izt) = yvxbart (ixt,iyt,izt) * pnumi
                yvybart (ixt,iyt,izt) = yvybart (ixt,iyt,izt) * pnumi
                yvzbart (ixt,iyt,izt) = yvzbart (ixt,iyt,izt) * pnumi
                zvxbart (ixt,iyt,izt) = zvxbart (ixt,iyt,izt) * pnumi
                zvybart (ixt,iyt,izt) = zvybart (ixt,iyt,izt) * pnumi
                zvzbart (ixt,iyt,izt) = zvzbart (ixt,iyt,izt) * pnumi
                xkebart (ixt,iyt,izt) = xkebart (ixt,iyt,izt) * pnumi
                ykebart (ixt,iyt,izt) = ykebart (ixt,iyt,izt) * pnumi
                zkebart (ixt,iyt,izt) = zkebart (ixt,iyt,izt) * pnumi
                xybart (ixt,iyt,izt) = xybart (ixt,iyt,izt) * pnumi
                xzbart (ixt,iyt,izt) = xzbart (ixt,iyt,izt) * pnumi
                yzbart (ixt,iyt,izt) = yzbart (ixt,iyt,izt) * pnumi
                xvxbar  = xvxbart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*vxbart(ixt,iyt,izt)
                xvybar  = xvybart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*vybart(ixt,iyt,izt)
                xvzbar  = xvzbart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*vzbart(ixt,iyt,izt)
                yvxbar  = yvxbart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*vxbart(ixt,iyt,izt)
                yvybar  = yvybart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*vybart(ixt,iyt,izt)
                yvzbar  = yvzbart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*vzbart(ixt,iyt,izt)
                zvxbar  = zvxbart (ixt,iyt,izt) - zbart (ixt,iyt,izt)*vxbart(ixt,iyt,izt)
                zvybar  = zvybart (ixt,iyt,izt) - zbart (ixt,iyt,izt)*vybart(ixt,iyt,izt)
                zvzbar  = zvzbart (ixt,iyt,izt) - zbart (ixt,iyt,izt)*vzbart(ixt,iyt,izt)
                xkebar  = xkebart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*kebart(ixt,iyt,izt)
                ykebar  = ykebart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*kebart(ixt,iyt,izt)
                zkebar  = zkebart (ixt,iyt,izt) - zbart (ixt,iyt,izt)*kebart(ixt,iyt,izt)
                xybar  = xybart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*ybart(ixt,iyt,izt)
                xzbar  = xzbart (ixt,iyt,izt) - xbart (ixt,iyt,izt)*zbart(ixt,iyt,izt)
                yzbar  = yzbart (ixt,iyt,izt) - ybart (ixt,iyt,izt)*zbart(ixt,iyt,izt)
                ! --- sets coefficients of linear system
                a1 = xvar;  b1 = xybar; c1 = xzbar; d1x = xvxbar; d1y = xvybar; d1z = xvzbar; d1ke = xkebar
                a2 = xybar; b2 = yvar;  c2 = yzbar; d2x = yvxbar; d2y = yvybar; d2z = yvzbar; d2ke = ykebar
                a3 = xzbar; b3 = yzbar; c3 = zvar;  d3x = zvxbar; d3y = zvybar; d3z = zvzbar; d3ke = zkebar
                if (lrtheta) then
                  d = 1./(a1*c3-a3*c1)
                  xvxslope = (c1*d3x-d1x*c3)*d
                  yvxslope = 0.
                  zvxslope = -(a3*xvxslope+d3x)/c3
                  xvyslope = (c1*d3y-d1y*c3)*d
                  yvyslope = 0.
                  zvyslope = -(a3*xvyslope+d3y)/c3
                  xvzslope = (c1*d3z-d1z*c3)*d
                  yvzslope = 0.
                  zvzslope = -(a3*xvzslope+d3z)/c3
                  xkeslope = (c1*d3ke-d1ke*c3)*d
                  ykeslope = 0.
                  zkeslope = -(a3*xkeslope+d3ke)/c3
                else
                  d = 1./((a1*c2-a2*c1)*(b2*c3-b3*c2)-(a2*c3-a3*c2)*(b1*c2-b2*c1))
                  xvxslope = ((d2x*c3-d3x*c2)*(b1*c2-b2*c1)-(d1x*c2-d2x*c1)*(b2*c3-b3*c2))*d
                  yvxslope = ((a2*c3-a3*c2)*xvxslope+d2x*c3-d3x*c2)/(b3*c2-b2*c3)
                  zvxslope = -(a1*xvxslope+b1*yvxslope+d1x)/c1
                  xvyslope = ((d2y*c3-d3y*c2)*(b1*c2-b2*c1)-(d1y*c2-d2y*c1)*(b2*c3-b3*c2))*d
                  yvyslope = ((a2*c3-a3*c2)*xvyslope+d2y*c3-d3y*c2)/(b3*c2-b2*c3)
                  zvyslope = -(a1*xvyslope+b1*yvyslope+d1y)/c1
                  xvzslope = ((d2z*c3-d3z*c2)*(b1*c2-b2*c1)-(d1z*c2-d2z*c1)*(b2*c3-b3*c2))*d
                  yvzslope = ((a2*c3-a3*c2)*xvzslope+d2z*c3-d3z*c2)/(b3*c2-b2*c3)
                  zvzslope = -(a1*xvzslope+b1*yvzslope+d1z)/c1
                  xkeslope = ((d2ke*c3-d3ke*c2)*(b1*c2-b2*c1)-(d1ke*c2-d2ke*c1)*(b2*c3-b3*c2))*d
                  ykeslope = ((a2*c3-a3*c2)*xkeslope+d2ke*c3-d3ke*c2)/(b3*c2-b2*c3)
                  zkeslope = -(a1*xkeslope+b1*ykeslope+d1ke)/c1
                end if
                ! --- remove correlations on vx, vy and vz
                vxvar = vxvar - xvxslope**2*xvar - yvxslope**2*yvar - zvxslope**2*zvar 
     &                        - 2*xvzslope*yvxslope*xybar
     &                        - 2*xvzslope*zvxslope*xzbar
     &                        - 2*yvzslope*zvxslope*yzbar
                vyvar = vyvar - xvyslope**2*xvar - yvyslope**2*yvar - zvyslope**2*zvar
     &                        - 2*xvzslope*yvyslope*xybar
     &                        - 2*xvzslope*zvyslope*xzbar
     &                        - 2*yvzslope*zvyslope*yzbar
                vzvar = vzvar - xvzslope**2*xvar - yvzslope**2*yvar - zvzslope**2*zvar
     &                        - 2*xvzslope*yvzslope*xybar
     &                        - 2*xvzslope*zvzslope*xzbar
     &                        - 2*yvzslope*zvzslope*yzbar
                vxvar = abs(vxvar)
                vyvar = abs(vyvar)
                vzvar = abs(vzvar)
                kevar = kevar - xkeslope**2*xvar - ykeslope**2*yvar - zkeslope**2*zvar 
     &                        - 2*xkeslope*ykeslope*xybar
     &                        - 2*xkeslope*zkeslope*xzbar
     &                        - 2*ykeslope*zkeslope*yzbar
              end if
            end if

            --- Compute temperatures
            tempx(ixt,iyt,izt,is) = tfact*vxvar
            tempy(ixt,iyt,izt,is) = tfact*vyvar
            tempz(ixt,iyt,izt,is) = tfact*vzvar

            --- Compute energy spread
            dke(ixt,iyt,izt,is)   = tfact*sqrt(kevar)
            if (lrelativ) dke(ixt,iyt,izt,is) = dke(ixt,iyt,izt,is) * 2.*clight**2

          enddo
        enddo
        tottmp = sum(pnumtw(:,:,izt))
        if(tottmp>SMALLPOS) then
          tempxz(izt,is) = sum(tempx(:,:,izt,is)*pnumtw(:,:,izt))/tottmp
          tempyz(izt,is) = sum(tempy(:,:,izt,is)*pnumtw(:,:,izt))/tottmp
          tempzz(izt,is) = sum(tempz(:,:,izt,is)*pnumtw(:,:,izt))/tottmp
        end if
      enddo
      temperaturestime = temperaturestime + (wtime() - timetemp)
      return
      end

      subroutine gett(is,lrtheta,l2symtry,l4symtry)
      Compute temperature in Z-slices for species 'is' on a 3-D grid:
        - the slices can have any position and thickness but cannot overlap,
        - the min and max of each slice in x, y and z are given respectively in the arrays
          tslicexmin, tslicexmax, tsliceymin, tsliceymax, tslicezmin and tslicezmax
          (the reason for having slices with different thickness and dimensions is
           to allow the temperature measurement to the shape of a distribution, like
           for example a beam extending over several quadrupoles and accelerating gaps),
        - the x, y and z temperatures are given in the arrays tempx, tempy and tempz,
          while averages in each slice are given in the arrays tempxz, tempyz, tempzz.
        - the calculation is done in three parts:
          o reset_temperature: zero out all moments
          o accumulate_temperature: accumulate moments from particles
          o finalize_temperature: compute final quantities
        - l_temp_collapseinz=.true.: collapse slices in z, i.e. align Z-locations
                                     of particles using current velocity (uxp,uyp,yzp) and
                                     velocity from previous time step (uxpo,uypo,uzpo)
        - lrtheta=.true.: radial and azimuthal are computed in place of X and Y.
        - the default temperature unit is in electron-volt. To select the units,
          set the variable t_unit to the default integers evolt, joule or kelvin.
        use InGen
        use Particles,Only: pgroup,wpid
        integer(ISZ):: is, ipmin, itask, i1, i2
        logical(ISZ):: lrtheta,l2symtry,l4symtry

        ipmin = pgroup%ins(is)
        call reset_temperature(is)
        if (pgroup%nps(is)ɬ) then
          i1 = ipmin
          i2 = ipmin + pgroup%nps(is) - 1
          if(wpidɬ) then
            call accumulate_temperature(pgroup%nps(is),
     &                pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),pgroup%uzp(i1:i2),
     &                pgroup%gaminv(i1:i2),pgroup%sw(is),dt,
     &                pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),pgroup%uzp(i1:i2),is,
     &                pgroup%pid(i1:i2,wpid),.true.,lrtheta,l2symtry,l4symtry)
          else
            call accumulate_temperature(pgroup%nps(is),
     &                pgroup%xp(i1:i2),pgroup%yp(i1:i2),pgroup%zp(i1:i2),
     &                pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),pgroup%uzp(i1:i2),
     &                pgroup%gaminv(i1:i2),pgroup%sw(is),dt,
     &                pgroup%uxp(i1:i2),pgroup%uyp(i1:i2),pgroup%uzp(i1:i2),is,
     &                pgroup%xp(i1:i2),.false.,lrtheta,l2symtry,l4symtry)
          end if
        end if
        call finalize_temperature(is,pgroup%sm(is),lrtheta)
        return
      end

      subroutine setregulartgrid(nx,ny,nz,xmin,xmax,ymin,ymax,zmin,zmax,dz,nzloc,lcollapse,lcorrel,lcrosscorrel)
        Setup regular grid for temperature calculation. The temperature is calculated in nz slices
        evenly spaced of thickness dz. The spacing between each slice is given by (zmax-zmin)/(nz-1).
        In each slice, the temperature will be computed on a nx+1*ny+1 grid of size (xmin,xmax,ymin,ymax).
        use Temperatures
        use InPart

        integer(ISZ) :: nx, ny, nz, nzloc
        logical(ISZ) :: lcollapse, lcorrel, lcrosscorrel
        real(kind=8) :: xmin, xmax, ymin, ymax, zmin, zmax, dz

        integer(ISZ) :: i

        nstemp = ns

        nxtslices = nx
        nytslices = ny
        nztslices = nz
        if(lcorrel) then
          l_temp_rmcorrelations = .true.
          nxtslicesc = nx
          nytslicesc = ny
          nztslicesc = nz
        else
          l_temp_rmcorrelations = .false.
        end if
        if(lcrosscorrel) then
          l_temp_rmcrosscorrelations = .true.
          nxtslicescc = nx
          nytslicescc = ny
          nztslicescc = nz
        else
          l_temp_rmcrosscorrelations = .false.
        end if
        nztlocator = nzloc
        call gchange("Temperatures",0)
        tslicexmin = xmin
        tslicexmax = xmax
        tsliceymin = ymin
        tsliceymax = ymax
        if (xmax .ne. xmin) then
          dxti = nx/(xmax-xmin)
        else
          dxti = 0.
        endif
        if (ymax .ne. ymin) then
          dyti = ny/(ymax-ymin)
        else
          dyti = 0.
        endif
        if(nztslicesɭ) then
          do i = 1, nztslices
            tslicezmin(i) = zmin+(i-1)*(zmax-zmin)/(nztslices-1)-0.5*dz
            tslicezmax(i) = tslicezmin(i) + dz
          end do
        else
          tslicezmin = zmin-0.5*dz
          tslicezmax = zmin+0.5*dz
        end if
        tloc_dzi = nzloc/(tslicezmax(nztslices)-tslicezmin(1))
        tloc_zmin = tslicezmin(1)
        tloc_zmax = tslicezmax(nztslices)
        call set_tslice_locator()

        return
      end

[setregulartgrid]
      subroutine set_tslice_locator()
        use Temperatures
        integer(ISZ) :: i, ii, izmin, izmax

        integer(ISZ), allocatable :: ntsloc(:,:)
        integer(ISZ):: allocerror

        allocate(ntsloc(nztlocator,nztslices),stat=allocerror)
        if (allocerror /= 0) then
          print*,"set_tslice_locator: allocation error ",allocerror,
     &           ": could not allocate ntsloc to shape ",nztlocator,nztslices
          call kaboom("set_tslice_locator: allocation error")
          return
        endif

        ntsloc = 0
        ntl = 0
        do i = 1, nztslices
          izmin = min(nztlocator,max(1,1+int((tslicezmin(i)-tloc_zmin)*tloc_dzi)))
          izmax = min(nztlocator,max(1,1+int((tslicezmax(i)-tloc_zmin)*tloc_dzi)))
          do ii = izmin,izmax
            ntl(ii) = ntl(ii)+1
            ntsloc(ii,ntl(ii)) = i
          end do
        end do
        ntlmax = maxval(ntl)
        call gchange("Temperatures",0)
        tslice_locator = ntsloc(:,:ntlmax)
  there is a problem here with the intel compiler
        deallocate(ntsloc)

        return
      end

      subroutine impact_ion(is1,is2,nbp,wp,shiftx,shifty,shiftz,deltax,deltay,deltaz,condid)
      use Constant
      use Particles,Only: pgroup,wpid,spid,ssn
      use InPart
      use LostParticles
      INTEGER, INTENT(IN) :: is1, is2, condid
      REAL(8), INTENT(IN) :: nbp,wp,shiftx,shifty,shiftz,deltax,deltay,deltaz

  Create particles of specie is2 created by impact of particles of species is1
  - nbp: number of new particles generated for each lost particle (can be fractional)
  - wp: energy in volts of new particles
  - shiftz,y,z: the location where the new particles are created relative
                to where the original particles are lost
  - deltax,y,z: the length over which the new particles are distributed
                relative to the location where the original particles are lost.
                The distribution is uniform between -delta/2 and +delta/2.
  - condid: id of conductor - only particles that are lost on the specified
            conductor generate new particles.

      INTEGER :: ip,i1,i2,ir,nbpt
      REAL(8) :: gamma, uz_tmp
      REAL(8), EXTERNAL :: wranf

      IF(nbp==0) return

      gamma = 1.+wp*echarge/(pgroup%sm(is2)*clight**2)
      IF(gamma-1.ə.e-6) then
        uz_tmp = gamma*SQRT(1.-1./(gamma**2))*clight
      else
        uz_tmp = SQRT(2.*wp*echarge/pgroup%sm(is2))
      END if

      do ip = inslost(is1), inslost(is1)+npslost(is1)-1
        if (condid > 0 .and. pidlost(ip,npidlost) .ne. condid) cycle
        nbpt = int(nbp+wranf())
        i1 = pgroup%ins(is2) + pgroup%nps(is2)
        i2 = i1 + nbpt - 1
        call chckpart(pgroup,is2,0,nbpt)
        do ir = 1, nbpt
          pgroup%xp(i1+ir-1) = xplost(ip)+shiftx+(wranf()-0.5)*deltax
          pgroup%yp(i1+ir-1) = yplost(ip)+shifty+(wranf()-0.5)*deltay
          pgroup%zp(i1+ir-1) = zplost(ip)+shiftz+(wranf()-0.5)*deltaz
          if(spidɬ) then
            pgroup%pid(i1+ir-1,spid) = ssn
            ssn=ssn+1
          end if
        end do
        pgroup%uxp(i1:i2) = (wranf()-0.5)*uz_tmp
        pgroup%uyp(i1:i2) = (wranf()-0.5)*uz_tmp
        uzp(i1:i2) = (wranf()-0.5)*uz_tmp !old
        pgroup%uzp(i1:i2) = -abs((wranf()-0.5)*uz_tmp)
        if(wpidɬ) pgroup%pid(i1:i2,wpid) = pidlost(ip,wpid)
        pgroup%gaminv(i1:i2) = 1./sqrt(1. +
     &                                 (pgroup%uxp(i1:i2)**2 +
     &                                  pgroup%uyp(i1:i2)**2 +
     &                                  pgroup%uzp(i1:i2)**2)/clight**2)
        pgroup%nps(is2) = pgroup%nps(is2) + nbpt
      end do

      return
      end subroutine impact_ion

      subroutine logicalsheath(nionslost,ionslost,nelectronslost,electronslost,
     &                         nz,lostgrid,zmin,zmax,npmax,z,gaminv)
      integer(ISZ):: nionslost
      integer(ISZ):: ionslost(nionslost)
      integer(ISZ):: nelectronslost
      integer(ISZ):: electronslost(nelectronslost)
      integer(ISZ):: nz
      integer(ISZ):: lostgrid(0:nz)
      real(kind=8):: zmin,zmax
      integer(ISZ):: npmax
      real(kind=8):: z(npmax),gaminv(npmax)

  This implements the logical sheath algorithm, only removing electrons
  if the same number of ions (locally) has been lost.
  Note that this does not sort electrons by energy as in the original
  Parker, Procosinni, Birdsal alrogithm. This could be done by sorting
  all of the lost electrons by energy before the loop over electrons.
  A further refinement would allow different weights for electrons and ions.
  lostgrid would then need to be real.
  Note that the reflection needs to be handled by the calling routine.

      integer(ISZ):: ii,ip,iz
      real(kind=8):: gz,dzi,wz
      integer(ISZ):: nelectronslost_save

      --- set grid cell inverse size
      if (zmax .ne. zmin) then
        dzi = nz/(zmax - zmin)
      else
        dzi = 0.
      endif

      --- loop over ions - they are all lost
      do ii=1,nionslost
        ip = ionslost(ii)

        --- find location on grid
        gz = (z(ip) - zmin)*dzi

        --- if within grid, accumulate
        if (0. <= gz .and. gz <= nz) then
          iz = nint(gz)
          lostgrid(iz) = lostgrid(iz) + 1
        endif

        --- set the lost flag
        gaminv(ip) = 0.

      enddo

      --- On return, electronslost will have the list of electrons that
      --- need to be reflected.
      nelectronslost_save = nelectronslost
      nelectronslost = 0

      --- loop over electrons - they are only lost if there were enough
      --- ions lost at the same grid cell, otherwise they'll be reflected
      do ii=1,nelectronslost_save
        ip = electronslost(ii)

        --- find location on grid
        gz = (z(ip) - zmin)*dzi

        --- if within grid, check the number of ions
        if (0. <= gz .and. gz <= nz) then
          iz = nint(gz)
          if (lostgrid(iz) > 0) then
            --- ions have been lost, so lose the electron
            lostgrid(iz) = lostgrid(iz) - 1
            gaminv(ip) = 0.
          else
            --- not enough ions had been lost, so reflect the electron
            nelectronslost = nelectronslost + 1
            electronslost(nelectronslost) = electronslost(ii)
          endif
        else
          --- Particle is outside the grid, so make it lost
          gaminv(ip) = 0.
        endif

      enddo

      return
      end subroutine logicalsheath