/usr/bin/dmtcp_rm_loclaunch is in dmtcp 2.3.1-6.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | #!/bin/bash
# ****************************************************************************
# * Copyright (C) 2012-2014 by Artem Y. Polyakov <artpol84@gmail.com> *
# * *
# * This file is part of the RM plugin for DMTCP *
# * *
# * RM plugin is free software: you can redistribute it and/or *
# * modify it under the terms of the GNU Lesser General Public License as *
# * published by the Free Software Foundation, either version 3 of the *
# * License, or (at your option) any later version. *
# * *
# * RM plugin is distributed in the hope that it will be useful, *
# * but WITHOUT ANY WARRANTY; without even the implied warranty of *
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
# * GNU Lesser General Public License for more details. *
# * *
# * You should have received a copy of the GNU Lesser General Public *
# * License along with DMTCP:dmtcp/src. If not, see *
# * <http://www.gnu.org/licenses/>. *
# ****************************************************************************/
prepare_SLURM_env()
{
LOCAL_FILES="$1"
# Create temp directory if need
DMTCP_TMPDIR=$TMPDIR/dmtcp-`whoami`@`hostname`
if [ ! -d "$DMTCP_TMPDIR" ]; then
mkdir -p $DMTCP_TMPDIR
fi
# Create files with SLURM environment
for CKPT_FILE in $LOCAL_FILES; do
SUFFIX=${CKPT_FILE%%.dmtcp}
SLURM_ENV_FILE=$DMTCP_TMPDIR/slurm_env_${SUFFIX##*_}
echo "SLURM_SRUN_COMM_HOST=$SLURM_SRUN_COMM_HOST" > $SLURM_ENV_FILE
echo "SLURM_SRUN_COMM_PORT=$SLURM_SRUN_COMM_PORT" >> $SLURM_ENV_FILE
echo "SLURMTMPDIR=$SLURMTMPDIR" >> $SLURM_ENV_FILE
done
}
if [ -n "$SLURM_JOBID" ] || [ -n "$SLURM_JOB_ID" ]; then
NODE=$SLURM_NODEID
if [ -z "$NODE" ]; then
# something goes wrong. Shouldn't happen
echo "Cannot determine SLURM_NODEID. Exit."
set
exit 0
fi
# Determine total number of nodes
NODES=$DMTCP_REMLAUNCH_NODES
if [ -z "$NODES" ] || [ "$NODE" -ge "$NODES" ]; then
# something goes wrong. Shouldn't happen
echo "No DMTCP environment or bad ID values: ID=$NODE, IDS=$NODES. Exit."
set
exit 0
fi
eval "LOCAL_SLOTS=\${DMTCP_REMLAUNCH_${NODE}_SLOTS}"
if [ "${LOCAL_SLOTS}" = 0 ] || [ -z "${LOCAL_SLOTS}" ]; then
echo "`hostname`: nothing to launch \${DMTCP_REMLAUNCH_${NODE}_SLOTS} = ${LOCAL_SLOTS}"
set
exit 0
fi
if [ "$SLURM_LOCALID" -ge $LOCAL_SLOTS ]; then
echo "`hostname`: Will not use SLURM_LOCALID=$SLURM_LOCALID for launch, max is $LOCAL_SLOTS"
exit 0
fi
eval "LOCAL_FILES=\$DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
if [ -z "$LOCAL_FILES" ]; then
echo "`hostname`: Bad LOCAL_FILES variable DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
set
exit 0
fi
prepare_SLURM_env "$LOCAL_FILES"
dmtcp_restart --join --host $DMTCP_HOST --port $DMTCP_PORT $LOCAL_FILES
# set > set.$SLURM_NODEID.$SLURM_LOCALID
# Accumulate logs from computing nodes
if [ -d ./LOGS ] && [ ${SLURM_LOCALID} -eq "0" ]; then
TDIR="$SLURMTMPDIR"
if [ -z "$TDIR" ]; then
TDIR=$TMPDIR
fi
#echo "TMPDIR=$TDIR"
if [ -n "$TDIR" ]; then
cp -R $TDIR/dmtcp* ./LOGS/
rm -R $TDIR/dmtcp*
fi
fi
elif [ "$PBS_ENVIRONMENT" = PBS_BATCH ] && [ -n "$PBS_JOBID" ]; then
cd $PBS_O_WORKDIR
NODE=$PBS_NODENUM
if [ -z "$NODE" ]; then
# something goes wrong. Shouldn't happen
echo "Cannot determine number of this node PBS_NODENUM=$PBS_NODENUM. Exit."
set
exit 0
fi
if [ -z "$1" ]; then
echo "$0: Not enough parameters: $@. Exit."
exit 0
fi
eval "$1"
# Determine total number of nodes
NODES=$DMTCP_REMLAUNCH_NODES
if [ -z "$NODES" ] || [ "$NODE" -ge "$NODES" ]; then
# something goes wrong. Shouldn't happen
echo "No DMTCP environment or bad ID values: ID=$NODE, IDS=$NODES. Exit."
set
exit 0
fi
eval "LOCAL_SLOTS=\${DMTCP_REMLAUNCH_${NODE}_SLOTS}"
if [ "${LOCAL_SLOTS}" = 0 ] || [ -z "${LOCAL_SLOTS}" ]; then
echo "`hostname`: nothing to launch \${DMTCP_REMLAUNCH_${NODE}_SLOTS} = ${LOCAL_SLOTS}"
set
exit 0
fi
MAX_SLOT=`expr "$LOCAL_SLOTS" - 1`
LOCAL_FILES=""
for slot in `seq 0 $MAX_SLOT`; do
eval "LOCAL_FILES_TMP=\$DMTCP_REMLAUNCH_${NODE}_${slot}"
LOCAL_FILES=$LOCAL_FILES" "$LOCAL_FILES_TMP
unset LOCAL_FILES_TMP
done
if [ -z "$LOCAL_FILES" ]; then
echo "`hostname`: Bad LOCAL_FILES variable DMTCP_REMLAUNCH_${NODE}_${SLURM_LOCALID}"
set
exit 0
fi
#echo "LOCAL_FILES=$LOCAL_FILES"
dmtcp_restart --join --host $DMTCP_HOST --port $DMTCP_PORT $LOCAL_FILES
if [ -d ./LOGS ]; then
cp -R /tmp/dmtcp* ./LOGS/
fi
fi
|