/usr/lib/mpich/bin/mpirun.pg is in mpich-bin 1.2.7-10ubuntu1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | #! /bin/sh
#
# This file is used to generate a p4-style procgroup file
#
# This should be used only for pg and execer runs. It generates a list from
# the machines.$arch file
#
# The original csh code made extensive use of csh arrays, which are quite
# nice for reading parts of a file into a variable.
#
# Format of machines list is lines containing either a single
# machinename
# or
# machinename:n
# where n is the number of available processors.
# We use : separators instead of spaces to simplify parsing of the file
#
# I have a note to make mpirun handle hostnames with and without domainnames
# This may have been fixed below; I'm leaving this marker here in case it
# comes up again.
#
#
external_allocate_cmd=""
#
#
if [ "$argsset" = "" ] ; then
mpirundir=`echo $0 | sed 's%/[^/][^/]*$%%'`
if [ -d "$mpirundir" ] ; then
. $mpirundir/mpirun.args
else
. mpirun.args
fi
fi
#
# Get the defaults (this is in preparation for multi-architecture versions)
#
if [ $narch = 0 ] ; then
narch=1
arch1=$arch
archlist1="$archlist"
archlocal=$arch
np1=$np
fi
if [ $nolocal = 1 ] ; then
# echo don\'t run local instance
# if the job is not to be run locally, then don't include this machine
# in the list
procFound=0
else
# echo run local instance
# Note that in the SMP cluster case, we may need to increase this
procFound=1
fi
#
machinelist=""
archuselist=""
nprocuselist=""
curarch=1
nolocalsave=$nolocal
archlocal=$arch1
#
# The following is from Rick Niles of Scyld, and adds the ability to
# use an external process allocator
if [ -n "$external_allocate_cmd" ] ; then
BEOWULF_JOB_MAP=`$external_allocate_cmd`
fi
if [ -n "$BEOWULF_JOB_MAP" ]; then
machinelist=`echo $BEOWULF_JOB_MAP | sed -e 's/:/ /g'`
machinehead=`echo $machinelist | cut -f 1 -d" "`
if [ "$machinehead" = "$MPI_HOST" ] ; then
multiple_args=`echo $machinelist | grep " "`
if [ "$multiple_args" ] ; then
machinelist=`echo $machinelist | cut -d" " -f 2-`
else
machinelist=""
fi
else
nolocal=1
fi
for i in $machinelist; do
archuselist="$archuselist $arch"
nprocuselist="$nprocuselist 1"
done
return;
fi
#
# The first time through the loop, we use the default machinefile, unless
# one has been specified.
#
# We use \$ instead of $ since some systems seem to require \$ and that
# appears to be correct.
#
while [ $curarch -le $narch ] ; do
eval arch=\$"arch$curarch"
eval archlist=\$"archlist$curarch"
if [ -z "$archlist" ] ; then archlist=$arch ; fi
eval np=\$"np$curarch"
if [ -z "$np" ] ; then
echo "Warning: unspecified -np value for $arch. Assuming -np 1."
np=1
fi
eval mFile=\$"machineFile$curarch"
if [ -n "$mFile" -a -r "$mFile" ] ; then machineFile=$mFile ; fi
#
# Get default machine file
if [ -z "$machineFile" ] ; then
# If on the ANL SPx, use getjid to get the machine list...
if [ "$machine" = "ibmspx" -a -x /usr/local/bin/getjid ] ; then
machineFile="/sphome/$LOGNAME/SPnodes.`/usr/local/bin/getjid`"
else
if [ -z "$machineDir" ] ; then machineDir=$datadir ; fi
for march in $archlist ; do
machineFile="${machineDir}/machines.${march}"
if [ -r $machineFile ] ; then break ; fi
done
fi
fi
#
if [ -z "$machineFile" -o ! -s "$machineFile" -o \
! -r "$machineFile" ] ; then
if [ -z "$external_allocate_cmd" \
-a \( $nolocal != 0 -o $np != 1 \) ] ; then
# if [ $nolocal != 0 -o $np != 1 ] ; then
echo Cannot read $machineFile.
echo Looked for files with extension $archlist in
echo directory $datadir .
exit 1
fi
# If we did NOT issue a message because np == 1, we need
# to be careful that machineFile is not empty.
machineFile=/dev/null
fi
#
# Find the machines to use (build in file). Note that "local"
# refers only to the first architecture.
#
MPI_HOSTLeader=`expr "$MPI_HOST" : '\([^\.]*\).*'`
#
# If MPIRUN_RANDOM is set to yes, randomly permute the machines file.
# This can be set by the environment variable MPIRUN_RANDOM being set
# to yes, or by the mpirun option -randommachines.
# The machines file created by configure will stay the same, the
# permuted one, mpirand-${LOGNAME}$$, will be located in
# $TEMPDIR ( if $TMPDIR is set, then TEMPDIR=$TMPDIR,
# else TEMPDIR=/tmp ).
if [ "$MPIRUN_RANDOM" = "yes" ] ; then
# If mpirun.rand is located in the mpirun/bin directory
if [ -f $MPIRUN_HOME/mpirun.rand ] ; then
if [ $mpirun_verbose = 1 ] ; then
echo "Permuting machines file"
fi
# Check to see if TMPDIR is set, if not, use /tmp
if [ -n "$TMPDIR" ] ; then
TEMPDIR=$TMPDIR
else
TEMPDIR="/tmp"
fi
export TEMPDIR
# Set machines random file
# machines_randFile is the permuted machines file
machines_randFile="$TEMPDIR/mpirand-${LOGNAME}$$"
if [ -x $MPIRUN_HOME/mpirun.rand ] ; then
$MPIRUN_HOME/mpirun.rand $machineFile $machines_randFile
else
chmod +x $MPIRUN_HOME/mpirun.rand
$MPIRUN_HOME/mpirun.rand $machineFile $machines_randFile
fi
else
echo "$MPIRUN_HOME/mpirun.rand does not exist"
echo "Cannot permute machine file"
fi
# If a permuted machines file has been created, use it
if [ -s "$machines_randFile" ] ; then
machineFile=$machines_randFile
if [ $mpirun_verbose = 1 ] ; then
echo "using the permuted machine file $machineFile"
fi
fi
fi
# A bug in the GNU head causes "head -08 filename" to generate a bogus
# error message (it interprets 0xxx as octal). Add 0 to the number
# as an easy way to eliminate any leading zeros.
#
# Just to make things even harder, later version of GNU utilities no
# longer support the classic Unix "head" command; instead, they have
# a command with the same name but a different command syntax (yes, this
# is a *very* bad thing to do. Please don't ever do something this nasty
# to your users).
#
# To workaround this, the top-level configure determines the format of the
# head command and puts the option name into ${HEADARG}
np=`expr $np + 0`
if [ $nolocal = 0 -o $curarch -gt 1 ] ; then
machineavail=`cat $machineFile | \
sed -e '/^#/d' -e 's/#.*$//g' | head ${HEADARG}${np} | \
tr '\012' ' '`
else
if [ $np -gt 1 -o $nolocal = 1 ] ; then
# Remove host from the list of available machines....
# Thanks to Bjarne Herland for modification to
# remove EXACTLY the host machine (eg., bar and not bark)
# machineavail=`cat $machineFile | sed -e '/^#/d' -e 's/#.*$//g' | \
# grep -v "^$MPI_HOST\([ -\.:]\)" | head ${HEADARG}${np} | tr '\012' ' '`
machineavail=`cat $machineFile | \
sed -e '/^#/d' -e 's/#.*$//g' -e "/^$MPI_HOST:/d" \
-e "/^$MPI_HOST *\$/d" | head ${HEADARG}${np} | tr '\012' ' '`
else
machineavail=""
fi
fi
#
# The following enhancement thanks to Rick Niles of Scyld
# Remove exclude machines from list
if [ -n "$EXCLUDE" ] ; then
excludelist=`echo $EXCLUDE | sed 's/:/ /g'`
for exclude in $excludelist; do
newlist=""
for machine in $machineavail; do
# If the machine list has names in the
# name:ncpu form, make sure that we have just the name
machine=`echo $machine | sed 's/:[0-9]*$//'`
if [ "$machine" != "$exclude" ]; then
newlist="$newlist $machine"
fi
done
machineavail=$newlist
done
fi
# Check for ALL_CPUS
if [ -n "$ALL_CPUS" ]; then
np=0
for machine in $machineavail; do
smpcntpresent=`echo $machine | grep :`
if [ -n "$smpcntpresent" ] ; then
num=`echo $machine | cut -f 2 -d:`
np=`expr $np + $num`
else
np=`expr $np + 1`
fi
done
fi
#
# One final refinement. We could make each job start with
# different processors with code like
#r=`date +%S`
#r=`expr 1 + $r / 10`
#m1=`echo $machineavail | cut -c $r- -d ' '`
#r=`expr $r - 1`
#m2=`echo $machineavail | cut -c 1-$r -d ' '`
#machineavail="$m1 $m2"
#
# Get the machine list for the job
# KeepHost suggested by Marc A. Viredaz; allows the same machine
# to be listed multiple times in the machines file.
# This will loop around the machines list until it
# finds enough machines (the nfound is used to detect infinite loops)
# Allow the host to be used if it is the only system found
#
KeepHost=0
loopcnt=0
# If we have an SMP cluster, then first find the number that we can run
# locally.
if [ -z "$MPI_MAX_CLUSTER_SIZE" ] ; then MPI_MAX_CLUSTER_SIZE=1 ; fi
if [ $nolocal = 0 -a $MPI_MAX_CLUSTER_SIZE -gt 1 ] ; then
for machineName in $machineavail ; do
# Split machineName into name and number
ntest=`expr $machineName : '.*:\([0-9]*\)'`
# if ntest is blank, there is no : in the entry
if [ -z "$ntest" ] ; then ntest=1 ; fi
mtest=`expr $machineName : '\(.*\):.*'`
if [ -z "$mtest" ] ; then mtest=$machineName ; fi
machineName=$mtest
machineNameLeader=`expr $machineName : '\([^\.]*\).*'`
if [ "$machineName" = "$MPI_HOST" -o \
\( "$machineName" != "$machineNameLeader" -a \
"$machineNameLeader" = "$MPI_HOSTLeader" \) ] ; then
nprocmachine=1
# Some systems return empty for no match; others return 0.
# Empty is what they should use (0 is ambiguous), but we
# have to be prepared for either.
if [ -n "$ntest" -a "$ntest" != "0" ] ; then
nprocmachine=$ntest
# Limit the allowed number
if [ "$MPI_MAX_CLUSTER_SIZE" -lt $nprocmachine ] ; then
nprocmachine=$MPI_MAX_CLUSTER_SIZE
fi
machineName=`expr $machineName : '\(.*\):.*'`
nproclocal="$nprocmachine"
procFound=$nproclocal
fi
break;
fi
done
fi
nleft=`expr $np - $procFound`
# This if test is needed (req #5332) for -comm=shared configure option
if [ $nleft -lt 0 ] ; then
nproclocal=$np
fi
while [ $procFound -lt $np ] ; do
nfound=0
for machineName in $machineavail ; do
# Split off the number of processors, if present
nprocmachine=1
# Some systems return empty for no match; others return 0. Empty
# is what they should use (0 is ambiguous), but we have to be
# prepared for either.
if [ "$ALL_LOCAL" = 1 ] ; then machineName="$MPI_HOST"; fi
ntest=`expr $machineName : '.*:\([0-9]*\)'`
if [ -n "$ntest" -a "$ntest" != "0" ] ; then
nprocmachine=$ntest
# Limit the allowed number
if [ -z "$MPI_MAX_CLUSTER_SIZE" ] ; then
nprocmachine=1
elif [ "$MPI_MAX_CLUSTER_SIZE" -lt $nprocmachine ] ; then
nprocmachine=$MPI_MAX_CLUSTER_SIZE
fi
machineName=`expr $machineName : '\(.*\):.*'`
fi
# If nolocal = 0 (local machine to be included),
# the first time around, we don't accept the local host.
machineNameLeader=`expr $machineName : '\([^\.]*\).*'`
if [ $nolocal = 1 -o $KeepHost = 1 -o \
\( "$machineName" != "$MPI_HOST" -a \
"$machineNameLeader" != "$MPI_HOSTLeader" \
\) ] ; then
# Should we convert this the ${machineName}:$nprocmachine
# (with a care taken for nprocmachine > np - procFound).
if [ $nprocmachine -gt $nleft ] ; then
nprocmachine=$nleft
fi
# Even better, we should use
# machinename:nproc:arch, so that everything is together.
machinelist="$machinelist $machineName"
archuselist="$archuselist $arch"
nprocuselist="$nprocuselist $nprocmachine"
procFound=`expr $procFound + $nprocmachine`
nfound=`expr $nfound + $nprocmachine`
nleft=`expr $nleft - $nprocmachine`
fi
if [ $procFound = $np ] ; then break ; fi
if [ "$machineName" = "$MPI_HOST" -o \
"$machineNameLeader" = "$MPI_HOSTLeader" ] ; then
KeepHost=1
fi
done
loopcnt=`expr $loopcnt + 1`
# After the first time, we may have found no hosts BUT now
# allow KeepHost.
if [ $nfound = 0 -a $loopcnt -gt 1 ] ; then
echo "Could not find enough machines for architecture $arch"
exit 1
fi
done
curarch=`expr $curarch + 1`
procFound=0
nolocal=1
machineFile=""
done
nolocal=$nolocalsave
#
if [ "$argsset" != "1" ] ; then
echo $machinelist
echo $archuselist
#echo $nprocuselist
machinehead=""
else
for machinehead in $machinelist ; do
break
done
fi
|