This file is indexed.

/usr/share/arc/submit-sge-job is in nordugrid-arc-arex 5.4.2-1build1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
#!/bin/sh
# set -xv
#
#  Submits job to Sun Grid Engine (SGE).
#  Input: path to grami file (same as Globus).
#
# A temporary job script is created for the submission and then removed 
# at the end of this script. 
#

echo "----- starting submit_sge_job -----" 1>&2

joboption_lrms=sge

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkgdatadir="$basedir"

. ${pkgdatadir}/configure-sge-env.sh || exit $?
. ${pkgdatadir}/submit_common.sh || exit $?

joboption_localtransfer='no'
#Log performance
perflogfilesub="${perflogdir}/submission.perflog"

if [ ! -z "$perflogdir" ]; then
   #start time stamp
   start_ts=`date +%s.%N`
fi


##############################################################
# Parse grami file, read arc config
##############################################################

init $1
cat $1 1>&2

read_arc_conf

##############################################################
# Zero stage of runtime environments
##############################################################

RTE_stage0

# Force shell /bin/sh, other qsub options have been moved to the job script
SGE_QSUB='qsub -S /bin/sh'
SGE_QCONF=qconf
if [ "$SGE_BIN_PATH" ] ; then
  SGE_QSUB=${SGE_BIN_PATH}/${SGE_QSUB}
  SGE_QCONF=${SGE_BIN_PATH}/${SGE_QCONF}
fi

mktempscript

##############################################################
# Start job script
##############################################################
echo '#!/bin/sh' > $LRMS_JOB_SCRIPT
echo "# SGE batch job script built by grid-manager" >> $LRMS_JOB_SCRIPT
# Job not rerunable:
echo "#$ -r n" >> $LRMS_JOB_SCRIPT
# Don't send mail when job finishes:
echo "#$ -m n" >> $LRMS_JOB_SCRIPT
# Mix standard output and standard error:
echo "#$ -j y" >> $LRMS_JOB_SCRIPT
# Write output to comment file:
echo "#$ -o ${joboption_directory}/.comment" >> $LRMS_JOB_SCRIPT

##############################################################
# priority
##############################################################
if [ ! -z "$joboption_priority" ]; then
  #first we must scale priority.  SGE: -1023 -> 1024 ARC: 0-100
  #user can only decrease priority: i.e. -1023 -> 0 (info from gsciacca@lhep.unibe.ch)
  #Same problem as SLURM. We can only prioritize grid jobs. Locally submitted jobs will get highest priority.
  priority=$((joboption_priority * 1023 / 100))
  priority=$((priority-1023))
  echo "#$ -p ${priority}" >> $LRMS_JOB_SCRIPT
fi

# Choose queue.
echo "#$ -q $joboption_queue" >> $LRMS_JOB_SCRIPT

# job name for convenience
if [ ! -z "${joboption_jobname}" ] ; then
  jobname=`echo "$joboption_jobname" | \
           sed 's/^\([^[:alpha:]]\)/N\1/' | \
           sed 's/[^[:alnum:]]/_/g' | \
           sed 's/\(...............\).*/\1/'`
  echo "#$ -N \"$jobname\"" >> $LRMS_JOB_SCRIPT
fi
echo "SGE jobname: $jobname" 1>&2

##############################################################
# (non-)parallel jobs
##############################################################

set_count

##############################################################
# parallel jobs
##############################################################
# In addition to the number of parallel tasks, also a valid
# parallel environment (PE) must be set for SGE.
#
# The selection of PE is done by Runtime Environment setup script in the zero
# stage. The user has to request a proper RE in addition to setting the
# "count" -property in the xrsl. The RE script must set the name of the desired 
# PE to joboption_nodeproperty_# -variable (# is a number starting from zero,
# RE should use the lowest previously undefined number). This script then
# searches through the joboption_nodeproperty_# variables and compares them to
# the PE list obtained from SGE. The first matching PE name is used.
# 
if [ -n "$joboption_nodeproperty_0" ]; then 
    i=0
    sge_parallel_environment_list=`$SGE_QCONF -spl`
    while eval jope=\${joboption_nodeproperty_$i} && test "$jope" ; do
        for ipe in $sge_parallel_environment_list ; do
            if [ "$jope" = "$ipe" ] ; then
                break 2 # now $jope contains the requested parallel env
            fi
        done
        i=$(($i + 1))
    done
    if [ -n "$jope" ] ; then
        echo "#\$ -pe $jope $joboption_count" >> $LRMS_JOB_SCRIPT
    else
        echo 'ERROR: Setting parallel environment failed.' 1>&2
    fi
fi

if [ "$joboption_exclusivenode" = "true" ]; then
  sge_excl_complex=`$SGE_QCONF -sc | grep EXCL | head -n 1`
  if [ -n "$sge_excl_complex" ]; then
     sge_excl_complex_name=`echo $sge_excl_complex | awk '{print $1}'`
     echo "#\$ -l ${sge_excl_complex_name}=true"  >> $LRMS_JOB_SCRIPT
  else
     echo "WARNING: Exclusive execution support is not configured by this Grid Engine" 1>&2
     echo "WARNING: Example configuration: https://wiki.nordugrid.org/index.php/LRMS_Backends/Testbeds" 1>&2
  fi
fi


##############################################################
# Execution times (obtained in seconds)
##############################################################
# SGE has soft and hard limits (soft = SIGUSR1, hard = SIGKILL sent to the job), 
# let's allow time_hardlimit_ratio extra before the hard limit.
# cputime/walltime is obtained in seconds via $joboption_cputime and $joboption_walltime
# parallel jobs, add initialization time, soft/hard limit configurable...
if ( [ -n "$joboption_cputime" ] && [ $joboption_cputime -gt 0 ] ) ; then
  # SGE enforces job-total cpu time limit, but it expects in h_cpu and s_cpu
  # per-slot limits. It then scales these with the number of requested slots
  # before enforcing them.
  cputime_perslot=$(( $joboption_cputime / $joboption_count ))
  cputime_hard_perslot=$(( $cputime_perslot * $time_hardlimit_ratio ))
  s_cpu_requestable=$($SGE_QCONF -sc|awk '($1=="s_cpu" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  h_cpu_requestable=$($SGE_QCONF -sc|awk '($1=="h_cpu" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  opt="#$"
  if [ $s_cpu_requestable ]; then opt="$opt -l s_cpu=::${cputime_perslot}"; fi
  if [ $h_cpu_requestable ]; then opt="$opt -l h_cpu=::${cputime_hard_perslot}"; fi
  echo $opt >> $LRMS_JOB_SCRIPT
fi

if [ -n "$joboption_walltime" ] ; then  
  if [ $joboption_walltime -lt 0 ] ; then
    echo 'WARNING: Less than 0 wall time requested: $joboption_walltime' 1>&2
    joboption_walltime=0
    echo 'WARNING: wall time set to 0' 1>&2
  fi
  joboption_walltime_hard=$(( $joboption_walltime * $time_hardlimit_ratio ))
  s_rt_requestable=$($SGE_QCONF -sc|awk '($1=="s_rt" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  h_rt_requestable=$($SGE_QCONF -sc|awk '($1=="h_rt" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  opt="#$"
  if [ $s_rt_requestable ]; then opt="$opt -l s_rt=::${joboption_walltime}"; fi
  if [ $h_rt_requestable ]; then opt="$opt -l h_rt=::${joboption_walltime_hard}"; fi
  echo $opt >> $LRMS_JOB_SCRIPT
fi



##############################################################
# Requested memory (mb)
##############################################################

set_req_mem

# There are soft and hard limits for virtual memory consumption in SGE

if [ -n "$joboption_memory" ] ; then
  joboption_memory_hard=$(( $joboption_memory * $memory_hardlimit_ratio ))
  h_vmem_requestable=$($SGE_QCONF -sc|awk '($1=="h_vmem" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  s_vmem_requestable=$($SGE_QCONF -sc|awk '($1=="s_vmem" && ( $5=="YES" || $5=="FORCED" )){print $5}')
  opt="#$"
  if [ $s_vmem_requestable ]; then opt="$opt -l s_vmem=${joboption_memory}M"; fi
  if [ $h_vmem_requestable ]; then opt="$opt -l h_vmem=${joboption_memory_hard}M"; fi
  echo $opt >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Extra job options. This is the last, so that
# it can overwrite previously set options.
##############################################################
if [ ! -z "$CONFIG_sge_jobopts" ]; then
  echo "#$ $CONFIG_sge_jobopts" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Override umask
##############################################################
echo "" >> $LRMS_JOB_SCRIPT
echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT
echo "umask 077" >> $LRMS_JOB_SCRIPT

##############################################################
# By default, use $TMPDIR from SGE to alleviate its cleanup facilities.
# It can be overridden with scratchdir though.
# Don't do this if "shared_scratch" is defined in arc.conf.
##############################################################

if [ "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! "$RUNTIME_FRONTEND_SEES_NODE" ]; then
    echo "if [ -d \"${CONFIG_scratchdir:-\$TMPDIR}\" ]; then RUNTIME_LOCAL_SCRATCH_DIR=${CONFIG_scratchdir:-\$TMPDIR}; fi" >> $LRMS_JOB_SCRIPT
fi

sourcewithargs_jobscript


##############################################################
# Add environment variables
##############################################################

add_user_env

##############################################################
# Check for existance of executable,
##############################################################
if [ -z "${joboption_arg_0}" ] ; then
  echo 'Executable is not specified' 1>&2
  exit 1
fi

if [ ! "$joboption_localtransfer" = 'yes' ] ; then
  program_start=`echo ${joboption_arg_0} | cut -c 1 2>&1`
  if [ "$program_start" != '$' ] && [ "$program_start" != '/' ] ; then
    if [ ! -f $joboption_directory/${joboption_arg_0} ] ; then 
      echo 'Executable does not exist, or permission denied.' 1>&2
      echo "   Executable $joboption_directory/${joboption_arg_0}" 1>&2
      echo "   whoami: "`whoami` 1>&2
      echo "   ls -l $joboption_directory/${joboption_arg_0}: "`ls -l $joboption_directory/${joboption_arg_0}`
      exit 1
    fi
    if [ ! -x $joboption_directory/${joboption_arg_0} ] ; then 
      echo 'Executable is not executable' 1>&2
      exit 1
    fi
  fi
fi

#######################################################################
# copy information useful for transfering files to/from node directly
#######################################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
   setup_local_transfer
fi

setup_runtime_env

# Override location of .diag file: put it under the working directory
echo 'RUNTIME_JOB_DIAG=$RUNTIME_JOB_DIR/.diag' >> $LRMS_JOB_SCRIPT

##############################################################
# Add std... to job arguments
##############################################################
include_std_streams

##############################################################
#  Move files to local working directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_node

echo "" >> $LRMS_JOB_SCRIPT
echo "RESULT=0" >> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT


#####################################################
#  Download input files
####################################################
download_input_files

##############################################################
#  Skip execution if something already failed
##############################################################
echo "" >> $LRMS_JOB_SCRIPT
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime configuration
##############################################################

RTE_stage1

echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT

if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then
  echo "Nodes detached from gridarea are not supported when SGE is used. Aborting job submit" 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  exit 1
fi

##############################################################
#  Execution
##############################################################
cd_and_run
echo "fi" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime (post)configuration at computing node
##############################################################
configure_runtime

#####################################################
#  Upload output files
####################################################
upload_output_files

##############################################################
#  Move files back to session directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_frontend

if [ ! -z "$perflogdir" ]; then
   stop_ts=`date +%s.%N`
   t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
   echo "[`date +%Y-%m-%d\ %T`] submit-sge-job, JobScriptCreation: $t" >> $perflogfilesub
fi


if [ ! -z "$perflogdir" ]; then
   #start time stamp
   start_ts=`date +%s.%N`
fi

#######################################
#  Submit the job
#######################################
(
  echo "SGE job script built"
  cd "$joboption_directory"
  echo "SGE script follows:"
  cat "$LRMS_JOB_SCRIPT"
  echo

  # Execute qsub command
  ${SGE_QSUB} < $LRMS_JOB_SCRIPT 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR

  # expected SGE output is like: 'Your job 77 ("perftest") has been
  # submitted', the line below uses only the job number as job id.
  job_id=$(cat $LRMS_JOB_OUT $LRMS_JOB_ERR \
           | awk '/^.our job .* has been submitted/ {split($0,field," ");print field[3]}')
  # anything else is a sign of problems, which should be logged
  warnings=$(cat $LRMS_JOB_OUT $LRMS_JOB_ERR \
             | grep -v '^.our job .* has been submitted' | grep -v '^Exit')
  if [ ! -z "$warnings" ]; then echo "WARNING: $warnings"; echo; fi

  exitcode=0
  if [ -z $job_id ] ; then
    echo "job *NOT* submitted successfully!"
    exitcode=1
  else
    echo "joboption_jobid=$job_id" >> $arg_file
    echo "local job id: $job_id"
    echo "job submitted successfully!"
    exitcode=0
  fi

  # Remove temporary job script file
  rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR
  echo "----- exiting submit_sge_job -----";
) 1>&2

if [ ! -z "$perflogdir" ]; then
   stop_ts=`date +%s.%N`
   t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
   echo "[`date +%Y-%m-%d\ %T`] submit-sge-job, JobSubmission: $t" >> $perflogfilesub
fi

exit $exitcode