/usr/share/arc/scan-fork-job is in nordugrid-arc-arex 1.1.1-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | #!/bin/sh
#
# Periodically monitor for jobs which has finished or failed but not
# reported an exitcode
#
id=`id -u`
#debug='eval echo >> /tmp/parse-fork-log.$id'
debug=:
$debug "run at `date`"
$debug "options = $@"
# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi
if [ -z "$1" ] ; then echo "Argument missing" 1>&2 ; exit 1 ; fi
# Prints the uid of the owner of the file given as argument
# Perl is used because it's more portable than using the stat command
printuid () {
code='my @s = stat($ARGV[0]); print($s[4] || "")'
/usr/bin/perl -we "$code" "$1"
}
#
# Attempts to switch to uid passed as the first argument and then runs the
# commands passed as the second argument in a shell. The remaining arguments
# are passed as arguments to the shell. No warning is given in case switching
# uid is not possible.
#
do_as_uid () {
test $# -ge 2 || { log "do_as_uid requires 2 arguments"; return 1; }
script='use English;
my ($uid, @args) = @ARGV;
if ( $UID == 0 && $uid ) {
eval { $UID = $uid };
print STDERR "Cannot switch to uid($UID): $@\n" if $UID != $uid;
}
system("/bin/sh","-c",@args);
exit ($?>>8||128+($?&127));
'
/usr/bin/perl -we "$script" "$@"
}
# Append .comment (containing STDOUT & STDERR of the job wrapper) to .errors
save_commentfile () {
uid=$1
commentfile=$2
errorsfile=$3
action="
{ echo '---------- Output of the job wrapper script -----------'
cat '$commentfile' 2> /dev/null
echo '------------------------- End of output -------------------------'
} >> '$errorsfile'
"
do_as_uid "$uid" "$action"
}
for control_dir in "$@" ; do
if [ ! -d "${control_dir}" ]; then
echo "No control dir $control_dir" 1>&2
continue
fi
# iterate over all jobs known in the control directory
find "${control_dir}/processing" -name 'job.*.status' \
| xargs egrep -l "INLRMS|CANCELING" \
| sed -e 's/.*job\.//' -e 's/\.status$//' \
| while read job; do
$debug "scanning job = $job"
unset joboption_jobid
unset joboption_directory
# this job was already completed, nothing remains to be done
[ -f "${control_dir}/job.${job}.lrms_done" ] && continue
# a grami file exists for all jobs that GM thinks are running.
# proceed to next job if this file is missing.
if [ ! -f "${control_dir}/job.${job}.grami" ]; then
continue
fi
# extract process IDs of the grami file
[ ! -f "${control_dir}/job.${job}.grami" ] && continue
. "${control_dir}/job.${job}.grami"
# process IDs could not be learned, proceeding to next
[ -z "$joboption_jobid" ] && continue
$debug "local jobid = $joboption_jobid"
# checking if process is still running
if ps -ouser= -p$joboption_jobid > /dev/null; then
$debug "ps returned $? - process $joboption_jobid of job $job is still running. Continuing to next"
continue
else
$debug "ps returned $? - process $joboption_jobid of job $job has exited!"
fi
uid=$(printuid "${control_dir}/job.${job}.local")
$debug "local user id = $uid"
diagfile=${joboption_directory}.diag
$debug "checking $diagfile"
exitcode=$(do_as_uid "$uid" "cat '$diagfile'" | sed -n 's/^exitcode=\([0-9]*\).*/\1/p')
$debug "exitcode = [$exitcode] extracted from $diagfile"
fork_comment=""
if [ -z "$exitcode" ]; then
echo "Job $job with PID $joboption_jobid died unexpectedly" 1>&2
fork_comment="Job died unexpectedly" 1>&2
exitcode=-1
elif [ "$exitcode" -ne '0' ]; then
fork_comment="Job finished with non-zero exit code" 1>&2
fi
$debug "got exitcode=$exitcode"
save_commentfile "$uid" "${joboption_directory}.comment" "${control_dir}/job.${job}.errors"
echo "$exitcode $fork_comment" > "${control_dir}/job.${job}.lrms_done"
done
done
$debug "done, going to sleep"
sleep 10
exit 0
|