*IF DEF,C96_1B T3ESFT1B.2
*IF DEF,MPP,AND,DEF,T3E T3ESFT1B.3
C *****************************COPYRIGHT****************************** T3ESFT1B.4
C (c) CROWN COPYRIGHT 1998, METEOROLOGICAL OFFICE, All Rights Reserved. T3ESFT1B.5
C T3ESFT1B.6
C Use, duplication or disclosure of this code is subject to the T3ESFT1B.7
C restrictions as set forth in the contract. T3ESFT1B.8
C T3ESFT1B.9
C Meteorological Office T3ESFT1B.10
C London Road T3ESFT1B.11
C BRACKNELL T3ESFT1B.12
C Berkshire UK T3ESFT1B.13
C RG12 2SZ T3ESFT1B.14
C T3ESFT1B.15
C If no contract has been raised with this copy of the code, the use, T3ESFT1B.16
C duplication or disclosure of it is strictly prohibited. Permission T3ESFT1B.17
C to do so must first be obtained in writing from the Head of Numerical T3ESFT1B.18
C Modelling at the above address. T3ESFT1B.19
C ******************************COPYRIGHT****************************** T3ESFT1B.20
! T3ESFT1B.21
! Description: T3ESFT1B.22
! Shift (rotate) the elements in a set of vectors distributed T3ESFT1B.23
! across all members of a group. T3ESFT1B.24
! T3ESFT1B.25
! NOTE: Unlike the GCOM version, this routine does not implement T3ESFT1B.26
! the zero fill option - only the true rotation. T3ESFT1B.27
! T3ESFT1B.28
! Method: T3ESFT1B.29
! This routine uses SHMEM directly. T3ESFT1B.30
! T3ESFT1B.31
! Current Code Owner: Bob Carruthers T3ESFT1B.32
! T3ESFT1B.33
! History: T3ESFT1B.34
! Model Date Modification history: T3ESFT1B.35
! version T3ESFT1B.36
! 4.5 01/06/97 New code optimised for the T3E T3ESFT1B.37
! Author: Bob Carruthers, Cray Research T3ESFT1B.38
! T3ESFT1B.39
! Subroutine Interface: T3ESFT1B.40
SUBROUTINE GCG_RVECSHIFT (LVL, LSL, LSO, NV, SHFT, WRAP, FIELD, 16,6T3ESFT1B.41
& GID, ISTAT) T3ESFT1B.42
C ****************************************************************** T3ESFT1B.43
C * Purpose: T3ESFT1B.44
C * T3ESFT1B.45
C * Shift (rotate) the elements in a set of vectors distributed T3ESFT1B.46
C * across all members of a group. T3ESFT1B.47
C * T3ESFT1B.48
C * Input: T3ESFT1B.49
C * LVL - Local Vector Length T3ESFT1B.50
C * LSL - Local Shift Length (the length of the subsection to T3ESFT1B.51
C * be shifted for each vector) T3ESFT1B.52
C * LSO - Local Shift Offset (element where the summation T3ESFT1B.53
C * start) T3ESFT1B.54
C * NV - Number of Vectors T3ESFT1B.55
C * SHFT - Number of Shifts to be done T3ESFT1B.56
C * WRAP - Logical indicating whether the vectors should be T3ESFT1B.57
C * wrapped around on shifts T3ESFT1B.58
C * FIELD - local array containing the vectors to be shifted T3ESFT1B.59
C * GID - processor group ID T3ESFT1B.60
C * T3ESFT1B.61
C * Output: T3ESFT1B.62
C * FIELD - Local array containing the shifted data T3ESFT1B.63
C * ISTAT - status of rsum. 0 is OK (1 and MPI_SRC only), T3ESFT1B.64
C * refer to the header files for nonzero status codes T3ESFT1B.65
C * T3ESFT1B.66
C * NOTES: T3ESFT1B.67
C * T3ESFT1B.68
C ****************************************************************** T3ESFT1B.69
IMPLICIT NONE T3ESFT1B.70
INTEGER LVL, LSL, LSO, NV, SHFT, GID, ISTAT T3ESFT1B.71
REAL FIELD(LVL,NV) T3ESFT1B.72
LOGICAL WRAP T3ESFT1B.73
*CALL AMAXSIZE
T3ESFT1B.74
*CALL PARVARS
T3ESFT1B.75
c T3ESFT1B.76
real field_in(row_length_max) T3ESFT1B.77
c T3ESFT1B.78
integer ipad1(32), ipad2(32) T3ESFT1B.79
c T3ESFT1B.80
common/gcg_rvecshift_shmem/ ipad1, field_in, ipad2 T3ESFT1B.81
c T3ESFT1B.82
integer g_start(maxproc), g_new_start, l_new_length, T3ESFT1B.83
2 l_iadd, current_length, l_rem_iadd, my_row_pe, i, j T3ESFT1B.84
c T3ESFT1B.85
if(gid.ne.gc_proc_row_group) then T3ESFT1B.86
write(0,'(''GCG_RVECSHIFT: T3E Optimised Code only'', T3ESFT1B.87
2 '' Works for a Row Group'')') T3ESFT1B.88
write(6,'(''GCG_RVECSHIFT: T3E Optimised Code only'', T3ESFT1B.89
2 '' Works for a Row Group'')') T3ESFT1B.90
call abort
('GCG_RVECSHIFT: T3E Optimised Code only'// T3ESFT1B.91
2 ' Works for a Row Group') T3ESFT1B.92
endif T3ESFT1B.93
c T3ESFT1B.94
if(.not.wrap) then T3ESFT1B.95
write(0,'(''GCG_RVECSHIFT: Shift with Zero Fill is'', T3ESFT1B.96
2 '' not Supported for the T3E Optimised Code'')') T3ESFT1B.97
write(6,'(''GCG_RVECSHIFT: Shift with Zero Fill is'', T3ESFT1B.98
2 '' not Supported for the T3E Optimised Code'')') T3ESFT1B.99
call abort
('GCG_RVECSHIFT: Zero Fill not Supported') T3ESFT1B.100
endif T3ESFT1B.101
c T3ESFT1B.102
if(lvl.gt.row_length_max) then T3ESFT1B.103
write(0,'(''GCG_RVECSHIFT: Workspace is too Small'', T3ESFT1B.104
2 '' - '',i6,'' Words Needed, but only '',i6, T3ESFT1B.105
3 '' Words Available'')') lvl, row_length_max T3ESFT1B.106
write(6,'(''GCG_RVECSHIFT: Workspace is too Small'', T3ESFT1B.107
2 '' - '',i6,'' Words Needed, but only '',i6, T3ESFT1B.108
3 '' Words Available'')') lvl, row_length_max T3ESFT1B.109
call abort
('GCG_RVECSHIFT: Workspace is too Small') T3ESFT1B.110
endif T3ESFT1B.111
c T3ESFT1B.112
c--find the lead PE on this row T3ESFT1B.113
my_row_pe=(mype/nproc_x)*nproc_x T3ESFT1B.114
g_start(1)=1 T3ESFT1B.115
c--find the global start addresses for PE's in my row T3ESFT1B.116
do i=2, nproc_x+1 T3ESFT1B.117
g_start(i)=g_start(i-1)+g_blsizep(1,i-2) T3ESFT1B.118
end do T3ESFT1B.119
c write(0,*) my_pe(), (g_start(i), i=1, nproc_x+1) T3ESFT1B.120
c T3ESFT1B.121
do j=1, nv T3ESFT1B.122
c--move the data into the common array T3ESFT1B.123
do i=1, lvl T3ESFT1B.124
field_in(i)=field(i,j) T3ESFT1B.125
end do T3ESFT1B.126
c T3ESFT1B.127
c--wait for everyone to pack the data into the common array T3ESFT1B.128
call gcg_ssync (
gid, istat) T3ESFT1B.129
c T3ESFT1B.130
if(istat.ne.0) then T3ESFT1B.131
write(0,9932) my_pe(), istat T3ESFT1B.132
write(6,9932) my_pe(), istat T3ESFT1B.133
9932 format(/'GCG_RVECSHIFT: PE ',i4,' got Status ',i12, T3ESFT1B.134
2 ' from GCG_SSYNC ') T3ESFT1B.135
call abort
('GCG_RVECSHIFT: Failed in GCG_SSYNC') T3ESFT1B.136
endif T3ESFT1B.137
c T3ESFT1B.138
c--set the global start address for the start of my exchange T3ESFT1B.139
g_new_start=g_start(mype-my_row_pe+1)+shft T3ESFT1B.140
c--set the length of the data to exchange T3ESFT1B.141
l_new_length=lsl T3ESFT1B.142
c--set the start address T3ESFT1B.143
l_iadd=lso T3ESFT1B.144
c--loop until we have moved all the segments for this PE T3ESFT1B.145
1000 continue T3ESFT1B.146
c--check we are not off the end T3ESFT1B.147
if(g_new_start.gt.glsize(1)) g_new_start= T3ESFT1B.148
2 g_new_start-glsize(1) T3ESFT1B.149
c--loop over the PE's in a row T3ESFT1B.150
do i=1, nproc_x T3ESFT1B.151
c--check if this glocal address is on the the current remote PE T3ESFT1B.152
if(g_new_start.ge.g_start(i) .and. T3ESFT1B.153
2 g_new_start.lt.g_start(i+1)) then T3ESFT1B.154
c--compute the new local address on the remote PE T3ESFT1B.155
l_rem_iadd=g_new_start-g_start(i) T3ESFT1B.156
c--compute the number of words to move on this get T3ESFT1B.157
current_length=min(l_new_length, T3ESFT1B.158
2 g_start(i+1)-g_new_start) T3ESFT1B.159
c write(0,*) my_pe(), ' fetch ', current_length, T3ESFT1B.160
c 2 ' from PE ',i-1, ' at ',l_rem_iadd T3ESFT1B.161
c--get the data T3ESFT1B.162
call shmem_get(
field(l_iadd, j), T3ESFT1B.163
2 field_in(l_rem_iadd+lso), current_length, T3ESFT1B.164
3 my_row_pe+i-1) T3ESFT1B.165
T3ESFT1B.166
c--update the global address and local addresses and lengths T3ESFT1B.167
g_new_start=g_new_start+current_length T3ESFT1B.168
l_iadd=l_iadd+current_length T3ESFT1B.169
l_new_length=l_new_length-current_length T3ESFT1B.170
c--check if we have finished T3ESFT1B.171
if(l_new_length.gt.0) goto 1000 T3ESFT1B.172
goto 1100 T3ESFT1B.173
endif T3ESFT1B.174
end do T3ESFT1B.175
write(0,*)'GCG_RVECSHIFT: Lost in GCG_RVECSHIFT' T3ESFT1B.176
call abort
('GCG_RVECSHIFT: Lost in GCG_RVECSHIFT') T3ESFT1B.177
T3ESFT1B.178
1100 continue T3ESFT1B.179
c T3ESFT1B.180
c--protect the buffer until everyone has finished T3ESFT1B.181
call gcg_ssync (
gid, istat) T3ESFT1B.182
c T3ESFT1B.183
if(istat.ne.0) then T3ESFT1B.184
write(0,9932) my_pe(), istat T3ESFT1B.185
write(6,9932) my_pe(), istat T3ESFT1B.186
call abort
('GCG_RVECSHIFT: Failed in GCG_SSYNC') T3ESFT1B.187
endif T3ESFT1B.188
c T3ESFT1B.189
end do T3ESFT1B.190
istat=0 T3ESFT1B.191
return T3ESFT1B.192
end T3ESFT1B.193
*ENDIF T3ESFT1B.194
*ENDIF T3ESFT1B.195