/usr/share/pyshared/mrjob/pool.py is in python-mrjob 0.3.3.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # Copyright 2012 Yelp and Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities related to job-flow-pooling. This code used to be in mrjob.emr.
"""
from datetime import datetime
from datetime import timedelta
try:
import boto.utils
boto # quiet "redefinition of unused ..." warning from pyflakes
except ImportError:
# don't require boto; MRJobs don't actually need it when running
# inside hadoop streaming
boto = None
def est_time_to_hour(job_flow, now=None):
"""How long before job reaches the end of the next full hour since it
began. This is important for billing purposes.
If it happens to be exactly a whole number of hours, we return
one hour, not zero.
"""
if now is None:
now = datetime.utcnow()
creationdatetime = getattr(job_flow, 'creationdatetime', None)
startdatetime = getattr(job_flow, 'startdatetime', None)
if creationdatetime:
if startdatetime:
start = datetime.strptime(startdatetime, boto.utils.ISO8601)
else:
start = datetime.strptime(job_flow.creationdatetime,
boto.utils.ISO8601)
else:
# do something reasonable if creationdatetime isn't set
return timedelta(minutes=60)
run_time = now - start
return timedelta(seconds=((-run_time).seconds % 3600.0 or 3600.0))
def pool_hash_and_name(job_flow):
"""Return the hash and pool name for the given job flow, or
``(None, None)`` if it isn't pooled."""
bootstrap_actions = getattr(job_flow, 'bootstrapactions', None)
if bootstrap_actions:
args = [arg.value for arg in bootstrap_actions[-1].args]
if len(args) == 2 and args[0].startswith('pool-'):
return args[0][5:], args[1]
return (None, None)
|