*IF DEF,C96_1B                                                             T3ESFT1B.2      
*IF DEF,MPP,AND,DEF,T3E                                                    T3ESFT1B.3      
C *****************************COPYRIGHT******************************     T3ESFT1B.4      
C (c) CROWN COPYRIGHT 1998, METEOROLOGICAL OFFICE, All Rights Reserved.    T3ESFT1B.5      
C                                                                          T3ESFT1B.6      
C Use, duplication or disclosure of this code is subject to the            T3ESFT1B.7      
C restrictions as set forth in the contract.                               T3ESFT1B.8      
C                                                                          T3ESFT1B.9      
C                Meteorological Office                                     T3ESFT1B.10     
C                London Road                                               T3ESFT1B.11     
C                BRACKNELL                                                 T3ESFT1B.12     
C                Berkshire UK                                              T3ESFT1B.13     
C                RG12 2SZ                                                  T3ESFT1B.14     
C                                                                          T3ESFT1B.15     
C If no contract has been raised with this copy of the code, the use,      T3ESFT1B.16     
C duplication or disclosure of it is strictly prohibited.  Permission      T3ESFT1B.17     
C to do so must first be obtained in writing from the Head of Numerical    T3ESFT1B.18     
C Modelling at the above address.                                          T3ESFT1B.19     
C ******************************COPYRIGHT******************************    T3ESFT1B.20     
!                                                                          T3ESFT1B.21     
! Description:                                                             T3ESFT1B.22     
!  Shift (rotate) the elements in a set of vectors distributed             T3ESFT1B.23     
!  across all members of a group.                                          T3ESFT1B.24     
!                                                                          T3ESFT1B.25     
! NOTE: Unlike the GCOM version, this routine does not implement           T3ESFT1B.26     
!       the zero fill option - only the true rotation.                     T3ESFT1B.27     
!                                                                          T3ESFT1B.28     
! Method:                                                                  T3ESFT1B.29     
!  This routine uses SHMEM directly.                                       T3ESFT1B.30     
!                                                                          T3ESFT1B.31     
! Current Code Owner: Bob Carruthers                                       T3ESFT1B.32     
!                                                                          T3ESFT1B.33     
! History:                                                                 T3ESFT1B.34     
!  Model    Date      Modification history:                                T3ESFT1B.35     
!  version                                                                 T3ESFT1B.36     
!    4.5    01/06/97  New code optimised for the T3E                       T3ESFT1B.37     
!                       Author: Bob Carruthers, Cray Research              T3ESFT1B.38     
!                                                                          T3ESFT1B.39     
! Subroutine Interface:                                                    T3ESFT1B.40     

      SUBROUTINE GCG_RVECSHIFT (LVL, LSL, LSO, NV, SHFT, WRAP, FIELD,       16,6T3ESFT1B.41     
     &     GID, ISTAT)                                                     T3ESFT1B.42     
C     ******************************************************************   T3ESFT1B.43     
C     * Purpose:                                                           T3ESFT1B.44     
C     *                                                                    T3ESFT1B.45     
C     *  Shift (rotate) the elements in a set of vectors distributed       T3ESFT1B.46     
C     *  across all members of a group.                                    T3ESFT1B.47     
C     *                                                                    T3ESFT1B.48     
C     * Input:                                                             T3ESFT1B.49     
C     *  LVL     - Local Vector Length                                     T3ESFT1B.50     
C     *  LSL     - Local Shift Length (the length of the subsection to     T3ESFT1B.51     
C     *            be shifted for each vector)                             T3ESFT1B.52     
C     *  LSO     - Local Shift Offset (element where the summation         T3ESFT1B.53     
C     *            start)                                                  T3ESFT1B.54     
C     *  NV      - Number of Vectors                                       T3ESFT1B.55     
C     *  SHFT    - Number of Shifts to be done                             T3ESFT1B.56     
C     *  WRAP    - Logical indicating whether the vectors should be        T3ESFT1B.57     
C     *            wrapped around on shifts                                T3ESFT1B.58     
C     *  FIELD   - local array containing the vectors to be shifted        T3ESFT1B.59     
C     *  GID     - processor group ID                                      T3ESFT1B.60     
C     *                                                                    T3ESFT1B.61     
C     * Output:                                                            T3ESFT1B.62     
C     *  FIELD   - Local array containing the shifted data                 T3ESFT1B.63     
C     *  ISTAT   - status of rsum. 0 is OK (1 and MPI_SRC only),           T3ESFT1B.64     
C     *            refer to the header files for nonzero status codes      T3ESFT1B.65     
C     *                                                                    T3ESFT1B.66     
C     * NOTES:                                                             T3ESFT1B.67     
C     *                                                                    T3ESFT1B.68     
C     ******************************************************************   T3ESFT1B.69     
      IMPLICIT NONE                                                        T3ESFT1B.70     
      INTEGER LVL, LSL, LSO, NV, SHFT, GID, ISTAT                          T3ESFT1B.71     
      REAL FIELD(LVL,NV)                                                   T3ESFT1B.72     
      LOGICAL WRAP                                                         T3ESFT1B.73     
*CALL AMAXSIZE                                                             T3ESFT1B.74     
*CALL PARVARS                                                              T3ESFT1B.75     
c                                                                          T3ESFT1B.76     
      real field_in(row_length_max)                                        T3ESFT1B.77     
c                                                                          T3ESFT1B.78     
      integer ipad1(32), ipad2(32)                                         T3ESFT1B.79     
c                                                                          T3ESFT1B.80     
      common/gcg_rvecshift_shmem/ ipad1, field_in, ipad2                   T3ESFT1B.81     
c                                                                          T3ESFT1B.82     
      integer g_start(maxproc), g_new_start, l_new_length,                 T3ESFT1B.83     
     2 l_iadd, current_length, l_rem_iadd, my_row_pe, i, j                 T3ESFT1B.84     
c                                                                          T3ESFT1B.85     
      if(gid.ne.gc_proc_row_group) then                                    T3ESFT1B.86     
        write(0,'(''GCG_RVECSHIFT: T3E Optimised Code only'',              T3ESFT1B.87     
     2   '' Works for a Row Group'')')                                     T3ESFT1B.88     
        write(6,'(''GCG_RVECSHIFT: T3E Optimised Code only'',              T3ESFT1B.89     
     2   '' Works for a Row Group'')')                                     T3ESFT1B.90     
        call abort('GCG_RVECSHIFT: T3E Optimised Code only'//              T3ESFT1B.91     
     2   ' Works for a Row Group')                                         T3ESFT1B.92     
      endif                                                                T3ESFT1B.93     
c                                                                          T3ESFT1B.94     
      if(.not.wrap) then                                                   T3ESFT1B.95     
        write(0,'(''GCG_RVECSHIFT: Shift with Zero Fill is'',              T3ESFT1B.96     
     2   '' not Supported for the T3E Optimised Code'')')                  T3ESFT1B.97     
        write(6,'(''GCG_RVECSHIFT: Shift with Zero Fill is'',              T3ESFT1B.98     
     2   '' not Supported for the T3E Optimised Code'')')                  T3ESFT1B.99     
        call abort('GCG_RVECSHIFT: Zero Fill not Supported')               T3ESFT1B.100    
      endif                                                                T3ESFT1B.101    
c                                                                          T3ESFT1B.102    
      if(lvl.gt.row_length_max) then                                       T3ESFT1B.103    
        write(0,'(''GCG_RVECSHIFT: Workspace is too Small'',               T3ESFT1B.104    
     2   '' - '',i6,'' Words Needed, but only '',i6,                       T3ESFT1B.105    
     3   '' Words Available'')') lvl, row_length_max                       T3ESFT1B.106    
        write(6,'(''GCG_RVECSHIFT: Workspace is too Small'',               T3ESFT1B.107    
     2   '' - '',i6,'' Words Needed, but only '',i6,                       T3ESFT1B.108    
     3   '' Words Available'')') lvl, row_length_max                       T3ESFT1B.109    
        call abort('GCG_RVECSHIFT: Workspace is too Small')                T3ESFT1B.110    
      endif                                                                T3ESFT1B.111    
c                                                                          T3ESFT1B.112    
c--find the lead PE on this row                                            T3ESFT1B.113    
      my_row_pe=(mype/nproc_x)*nproc_x                                     T3ESFT1B.114    
      g_start(1)=1                                                         T3ESFT1B.115    
c--find the global start addresses for PE's in my row                      T3ESFT1B.116    
      do i=2, nproc_x+1                                                    T3ESFT1B.117    
        g_start(i)=g_start(i-1)+g_blsizep(1,i-2)                           T3ESFT1B.118    
      end do                                                               T3ESFT1B.119    
c      write(0,*) my_pe(), (g_start(i), i=1, nproc_x+1)                    T3ESFT1B.120    
c                                                                          T3ESFT1B.121    
      do j=1, nv                                                           T3ESFT1B.122    
c--move the data into the common array                                     T3ESFT1B.123    
        do i=1, lvl                                                        T3ESFT1B.124    
          field_in(i)=field(i,j)                                           T3ESFT1B.125    
        end do                                                             T3ESFT1B.126    
c                                                                          T3ESFT1B.127    
c--wait for everyone to pack the data into the common array                T3ESFT1B.128    
        call gcg_ssync (gid, istat)                                        T3ESFT1B.129    
c                                                                          T3ESFT1B.130    
        if(istat.ne.0) then                                                T3ESFT1B.131    
          write(0,9932) my_pe(), istat                                     T3ESFT1B.132    
          write(6,9932) my_pe(), istat                                     T3ESFT1B.133    
9932      format(/'GCG_RVECSHIFT: PE ',i4,' got Status ',i12,              T3ESFT1B.134    
     2     ' from GCG_SSYNC ')                                             T3ESFT1B.135    
          call abort('GCG_RVECSHIFT: Failed in GCG_SSYNC')                 T3ESFT1B.136    
        endif                                                              T3ESFT1B.137    
c                                                                          T3ESFT1B.138    
c--set the global start address for the start of my exchange               T3ESFT1B.139    
        g_new_start=g_start(mype-my_row_pe+1)+shft                         T3ESFT1B.140    
c--set the length of the data to exchange                                  T3ESFT1B.141    
        l_new_length=lsl                                                   T3ESFT1B.142    
c--set the start address                                                   T3ESFT1B.143    
        l_iadd=lso                                                         T3ESFT1B.144    
c--loop until we have moved all the segments for this PE                   T3ESFT1B.145    
1000    continue                                                           T3ESFT1B.146    
c--check we are not off the end                                            T3ESFT1B.147    
          if(g_new_start.gt.glsize(1)) g_new_start=                        T3ESFT1B.148    
     2     g_new_start-glsize(1)                                           T3ESFT1B.149    
c--loop over the PE's in a row                                             T3ESFT1B.150    
          do i=1, nproc_x                                                  T3ESFT1B.151    
c--check if this glocal address is on the the current remote PE            T3ESFT1B.152    
            if(g_new_start.ge.g_start(i) .and.                             T3ESFT1B.153    
     2       g_new_start.lt.g_start(i+1)) then                             T3ESFT1B.154    
c--compute the new local address on the remote PE                          T3ESFT1B.155    
              l_rem_iadd=g_new_start-g_start(i)                            T3ESFT1B.156    
c--compute the number of words to move on this get                         T3ESFT1B.157    
              current_length=min(l_new_length,                             T3ESFT1B.158    
     2         g_start(i+1)-g_new_start)                                   T3ESFT1B.159    
c              write(0,*) my_pe(), ' fetch ', current_length,              T3ESFT1B.160    
c     2         ' from PE ',i-1, ' at ',l_rem_iadd                         T3ESFT1B.161    
c--get the data                                                            T3ESFT1B.162    
              call shmem_get(field(l_iadd, j),                             T3ESFT1B.163    
     2         field_in(l_rem_iadd+lso), current_length,                   T3ESFT1B.164    
     3         my_row_pe+i-1)                                              T3ESFT1B.165    
                                                                           T3ESFT1B.166    
c--update the global address and local addresses and lengths               T3ESFT1B.167    
              g_new_start=g_new_start+current_length                       T3ESFT1B.168    
              l_iadd=l_iadd+current_length                                 T3ESFT1B.169    
              l_new_length=l_new_length-current_length                     T3ESFT1B.170    
c--check if we have finished                                               T3ESFT1B.171    
              if(l_new_length.gt.0) goto 1000                              T3ESFT1B.172    
              goto 1100                                                    T3ESFT1B.173    
            endif                                                          T3ESFT1B.174    
          end do                                                           T3ESFT1B.175    
          write(0,*)'GCG_RVECSHIFT: Lost in GCG_RVECSHIFT'                 T3ESFT1B.176    
          call abort('GCG_RVECSHIFT: Lost in GCG_RVECSHIFT')               T3ESFT1B.177    
                                                                           T3ESFT1B.178    
1100    continue                                                           T3ESFT1B.179    
c                                                                          T3ESFT1B.180    
c--protect the buffer until everyone has finished                          T3ESFT1B.181    
        call gcg_ssync (gid, istat)                                        T3ESFT1B.182    
c                                                                          T3ESFT1B.183    
        if(istat.ne.0) then                                                T3ESFT1B.184    
          write(0,9932) my_pe(), istat                                     T3ESFT1B.185    
          write(6,9932) my_pe(), istat                                     T3ESFT1B.186    
          call abort('GCG_RVECSHIFT: Failed in GCG_SSYNC')                 T3ESFT1B.187    
        endif                                                              T3ESFT1B.188    
c                                                                          T3ESFT1B.189    
      end do                                                               T3ESFT1B.190    
      istat=0                                                              T3ESFT1B.191    
      return                                                               T3ESFT1B.192    
      end                                                                  T3ESFT1B.193    
*ENDIF                                                                     T3ESFT1B.194    
*ENDIF                                                                     T3ESFT1B.195