This file is indexed.

/usr/lib/python3/dist-packages/bioblend/cloudman/launch.py is in python3-bioblend 0.7.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
"""
Setup and launch a CloudMan instance.
"""
import datetime
import yaml
import socket

import boto
from boto.compat import http_client
from boto.ec2.regioninfo import RegionInfo
from boto.exception import EC2ResponseError, S3ResponseError
from boto.s3.connection import OrdinaryCallingFormat, S3Connection, SubdomainCallingFormat
import six
from six.moves.http_client import HTTPConnection
from six.moves.urllib.parse import urlparse

import bioblend
from bioblend.util import Bunch

# Uncomment the following line if no logging from boto is desired
# bioblend.logging.getLogger('boto').setLevel(bioblend.logging.CRITICAL)
# Uncomment the following line if logging at the prompt is desired
# bioblend.set_stream_logger(__name__)


def instance_types(cloud_name='generic'):
    """
    Return a list of dictionaries containing details about the available
    instance types for the given `cloud_name`.

    :type cloud_name: str
    :param cloud_name: A name of the cloud for which the list of instance
                       types will be returned. Valid values are: `aws`,
                       `nectar`, `generic`.

    :rtype: list
    :return: A list of dictionaries describing instance types. Each dict will
             contain the following keys: `name`, `model`, and `description`.
    """
    instance_list = []
    if cloud_name.lower() == 'aws':
        instance_list.append({"model": "c3.large",
                              "name": "Compute optimized Large",
                              "description": "2 vCPU/4GB RAM"})
        instance_list.append({"model": "c3.2xlarge",
                              "name": "Compute optimized 2xLarge",
                              "description": "8 vCPU/15GB RAM"})
        instance_list.append({"model": "c3.8xlarge",
                              "name": "Compute optimized 8xLarge",
                              "description": "32 vCPU/60GB RAM"})
    elif cloud_name.lower() in ['nectar', 'generic']:
        instance_list.append({"model": "m1.small",
                              "name": "Small",
                              "description": "1 vCPU / 4GB RAM"})
        instance_list.append({"model": "m1.medium",
                              "name": "Medium",
                              "description": "2 vCPU / 8GB RAM"})
        instance_list.append({"model": "m1.large",
                              "name": "Large",
                              "description": "4 vCPU / 16GB RAM"})
        instance_list.append({"model": "m1.xlarge",
                              "name": "Extra Large",
                              "description": "8 vCPU / 32GB RAM"})
        instance_list.append({"model": "m1.xxlarge",
                              "name": "Extra-extra Large",
                              "description": "16 vCPU / 64GB RAM"})
    return instance_list


class CloudManLauncher(object):
    def __init__(self, access_key, secret_key, cloud=None):
        """
        Define the environment in which this instance of CloudMan will be launched.

        Besides providing the credentials, optionally provide the ``cloud``
        object. This object must define the properties required to establish a
        `boto <https://github.com/boto/boto/>`_ connection to that cloud. See
        this method's implementation for an example of the required fields.
        Note that as long the as provided object defines the required fields,
        it can really by implemented as anything (e.g., a Bunch, a database
        object, a custom class). If no value for the ``cloud`` argument is
        provided, the default is to use the Amazon cloud.
        """
        self.access_key = access_key
        self.secret_key = secret_key
        if cloud is None:
            # Default to an EC2-compatible object
            self.cloud = Bunch(id='1',  # for compatibility w/ DB representation
                               name="Amazon",
                               cloud_type="ec2",
                               bucket_default="cloudman",
                               region_name="us-east-1",
                               region_endpoint="ec2.amazonaws.com",
                               ec2_port="",
                               ec2_conn_path="/",
                               cidr_range="",
                               is_secure=True,
                               s3_host="s3.amazonaws.com",
                               s3_port="",
                               s3_conn_path='/')
        else:
            self.cloud = cloud
        self.ec2_conn = self.connect_ec2(self.access_key, self.secret_key, self.cloud)
        # Define exceptions from http_client that we want to catch and retry
        self.http_exceptions = (http_client.HTTPException, socket.error,
                                socket.gaierror, http_client.BadStatusLine)

    def __repr__(self):
        return "Cloud: {0}; acct ID: {1}".format(self.cloud.name, self.access_key)

    def launch(self, cluster_name, image_id, instance_type, password,
               kernel_id=None, ramdisk_id=None, key_name='cloudman_key_pair',
               security_groups=['CloudMan'], placement='', subnet_id=None, **kwargs):
        """
        Check all the prerequisites (key pair and security groups) for
        launching a CloudMan instance, compose the user data based on the
        parameters specified in the arguments and the cloud properties as
        defined in the object's ``cloud`` field.

        For the current list of user data fields that can be provided via
        ``kwargs``, see `<http://wiki.g2.bx.psu.edu/CloudMan/UserData>`_

        Return a dict containing the properties and info with which an instance
        was launched, namely: ``sg_names`` containing the names of the security
        groups, ``kp_name`` containing the name of the key pair, ``kp_material``
        containing the private portion of the key pair (*note* that this portion
        of the key is available and can be retrieved *only* at the time the key
        is created, which will happen only if no key with the name provided in
        the ``key_name`` argument exists), ``rs`` containing the
        `boto <https://github.com/boto/boto/>`_ ``ResultSet`` object,
        ``instance_id`` containing the ID of a started instance, and
        ``error`` containing an error message if there was one.
        """
        ret = {'sg_names': [],
               'sg_ids': [],
               'kp_name': '',
               'kp_material': '',
               'rs': None,
               'instance_id': '',
               'error': None}
        security_group_ids = []
        # First satisfy the prerequisites
        for sg in security_groups:
            cmsg = self.create_cm_security_group(sg)
            ret['error'] = cmsg['error']
            if ret['error']:
                return ret
            if cmsg['name']:
                ret['sg_names'].append(cmsg['name'])
                ret['sg_ids'].append(cmsg['sg_id'])
                security_group_ids.append(cmsg['sg_id'])
        kp_info = self.create_key_pair(key_name)
        ret['error'] = kp_info['error']
        if ret['error']:
            return ret
        ret['kp_name'] = kp_info['name']
        ret['kp_material'] = kp_info['material']
        # If not provided, try to find a placement
        # TODO: Should placement always be checked? To make sure it's correct
        # for existing clusters.
        if not placement:
            placement = self._find_placement(cluster_name).get('placement', None)
        # Compose user data for launching an instance, ensuring we have the required fields
        kwargs['access_key'] = self.access_key
        kwargs['secret_key'] = self.secret_key
        kwargs['cluster_name'] = cluster_name
        kwargs['password'] = password
        kwargs['cloud_name'] = self.cloud.name
        ud = self._compose_user_data(kwargs)
        # Now launch an instance
        try:
            rs = None
            rs = self.ec2_conn.run_instances(image_id=image_id,
                                             instance_type=instance_type,
                                             key_name=key_name,
                                             security_group_ids=security_group_ids,
                                             user_data=ud,
                                             kernel_id=kernel_id,
                                             ramdisk_id=ramdisk_id,
                                             subnet_id=subnet_id,
                                             placement=placement)
            ret['rs'] = rs
        except EC2ResponseError as e:
            err_msg = "Problem launching an instance: {0} (code {1}; status {2})" \
                      .format(e.message, e.error_code, e.status)
            bioblend.log.exception(err_msg)
            ret['error'] = err_msg
            return ret
        else:
            if rs:
                try:
                    bioblend.log.info("Launched an instance with ID %s" % rs.instances[0].id)
                    ret['instance_id'] = rs.instances[0].id
                    ret['instance_ip'] = rs.instances[0].ip_address
                except EC2ResponseError as e:
                    err_msg = "Problem with the launched instance object: {0} " \
                              "(code {1}; status {2})" \
                              .format(e.message, e.error_code, e.status)
                    bioblend.log.exception(err_msg)
                    ret['error'] = err_msg
            else:
                    ret['error'] = ("No response after launching an instance. Check "
                                    "your account permissions and try again.")
        return ret

    def create_cm_security_group(self, sg_name='CloudMan'):
        """
        Create a security group with all authorizations required to run CloudMan.

        If the group already exists, check its rules and add the missing ones.

        :type sg_name: str
        :param sg_name: A name for the security group to be created.

        :rtype: dict
        :return: A dictionary containing keys ``name`` (with the value being the
                 name of the security group that was created), ``error``
                 (with the value being the error message if there was an error
                 or ``None`` if no error was encountered), and ``ports``
                 (containing the list of tuples with port ranges that were
                 opened or attempted to be opened).

        .. versionchanged:: 0.6.1
            The return value changed from a string to a dict
        """
        ports = (('20', '21'),  # FTP
                 ('22', '22'),  # SSH
                 ('80', '80'),  # Web UI
                 ('443', '443'),  # SSL Web UI
                 ('8800', '8800'),  # NodeJS Proxy for Galaxy IPython IE
                 ('9600', '9700'),  # HTCondor
                 ('30000', '30100'))  # FTP transfer
        progress = {'name': None,
                    'sg_id': None,
                    'error': None,
                    'ports': ports}
        cmsg = None
        # Check if this security group already exists
        try:
            sgs = self.ec2_conn.get_all_security_groups()
        except EC2ResponseError as e:
            err_msg = ("Problem getting security groups. This could indicate a "
                       "problem with your account credentials or permissions: "
                       "{0} (code {1}; status {2})"
                       .format(e.message, e.error_code, e.status))
            bioblend.log.exception(err_msg)
            progress['error'] = err_msg
            return progress
        for sg in sgs:
            if sg.name == sg_name:
                cmsg = sg
                bioblend.log.debug("Security group '%s' already exists; will "
                                   "add authorizations next." % sg_name)
                break
        # If it does not exist, create security group
        if cmsg is None:
            bioblend.log.debug("Creating Security Group %s" % sg_name)
            try:
                cmsg = self.ec2_conn.create_security_group(sg_name, 'A security '
                                                           'group for CloudMan')
            except EC2ResponseError as e:
                err_msg = "Problem creating security group '{0}': {1} (code {2}; " \
                          "status {3})" \
                          .format(sg_name, e.message, e.error_code, e.status)
                bioblend.log.exception(err_msg)
                progress['error'] = err_msg
        if cmsg:
            progress['name'] = cmsg.name
            progress['sg_id'] = cmsg.id
            # Add appropriate authorization rules
            # If these rules already exist, nothing will be changed in the SG
            for port in ports:
                try:
                    if not self.rule_exists(cmsg.rules, from_port=port[0], to_port=port[1]):
                        cmsg.authorize(ip_protocol='tcp', from_port=port[0], to_port=port[1], cidr_ip='0.0.0.0/0')
                    else:
                        bioblend.log.debug("Rule (%s:%s) already exists in the SG" % (port[0], port[1]))
                except EC2ResponseError as e:
                    err_msg = "A problem adding security group authorizations: {0} " \
                              "(code {1}; status {2})" \
                              .format(e.message, e.error_code, e.status)
                    bioblend.log.exception(err_msg)
                    progress['error'] = err_msg
            # Add ICMP (i.e., ping) rule required by HTCondor
            try:
                if not self.rule_exists(cmsg.rules, from_port='-1', to_port='-1', ip_protocol='icmp'):
                    cmsg.authorize(ip_protocol='icmp', from_port=-1, to_port=-1, cidr_ip='0.0.0.0/0')
                else:
                    bioblend.log.debug("ICMP rule already exists in {0} SG.".format(sg_name))
            except EC2ResponseError as e:
                err_msg = "A problem with security ICMP rule authorization: {0} " \
                          "(code {1}; status {2})" \
                          .format(e.message, e.error_code, e.status)
                bioblend.log.exception(err_msg)
                progress['err_msg'] = err_msg
            # Add rule that allows communication between instances in the same SG
            g_rule_exists = False  # A flag to indicate if group rule already exists
            for rule in cmsg.rules:
                for grant in rule.grants:
                    if grant.name == cmsg.name:
                        g_rule_exists = True
                        bioblend.log.debug("Group rule already exists in the SG.")
                if g_rule_exists:
                    break
            if not g_rule_exists:
                try:
                    cmsg.authorize(src_group=cmsg, ip_protocol='tcp', from_port=0, to_port=65535)
                except EC2ResponseError as e:
                    err_msg = "A problem with security group authorization: {0} " \
                              "(code {1}; status {2})" \
                              .format(e.message, e.error_code, e.status)
                    bioblend.log.exception(err_msg)
                    progress['err_msg'] = err_msg
            bioblend.log.info("Done configuring '%s' security group" % cmsg.name)
        else:
            bioblend.log.warning("Did not create security group '{0}'".format(sg_name))
        return progress

    def rule_exists(self, rules, from_port, to_port, ip_protocol='tcp', cidr_ip='0.0.0.0/0'):
        """
        A convenience method to check if an authorization rule in a security group
        already exists.
        """
        for rule in rules:
            if rule.ip_protocol == ip_protocol and rule.from_port == from_port and \
               rule.to_port == to_port and cidr_ip in [ip.cidr_ip for ip in rule.grants]:
                return True
        return False

    def create_key_pair(self, key_name='cloudman_key_pair'):
        """
        If a key pair with the provided ``key_name`` does not exist, create it.

        :type sg_name: str
        :param sg_name: A name for the key pair to be created.

        :rtype: dict
        :return: A dictionary containing keys ``name`` (with the value being the
                 name of the key pair that was created), ``error``
                 (with the value being the error message if there was an error
                 or ``None`` if no error was encountered), and ``material``
                 (containing the unencrypted PEM encoded RSA private key if the
                 key was created or ``None`` if the key already eixsted).

        .. versionchanged:: 0.6.1
            The return value changed from a tuple to a dict
        """
        progress = {'name': None,
                    'material': None,
                    'error': None}
        kp = None
        # Check if a key pair under the given name already exists. If it does not,
        # create it, else return.
        try:
            kps = self.ec2_conn.get_all_key_pairs()
        except EC2ResponseError as e:
            err_msg = "Problem getting key pairs: {0} (code {1}; status {2})" \
                      .format(e.message, e.error_code, e.status)
            bioblend.log.exception(err_msg)
            progress['error'] = err_msg
            return progress
        for akp in kps:
            if akp.name == key_name:
                bioblend.log.info("Key pair '%s' already exists; reusing it." % key_name)
                progress['name'] = akp.name
                return progress
        try:
            kp = self.ec2_conn.create_key_pair(key_name)
        except EC2ResponseError as e:
            err_msg = "Problem creating key pair '{0}': {1} (code {2}; status {3})" \
                      .format(key_name, e.message, e.error_code, e.status)
            bioblend.log.exception(err_msg)
            progress['error'] = err_msg
            return progress
        bioblend.log.info("Created key pair '%s'" % kp.name)
        progress['name'] = kp.name
        progress['material'] = kp.material
        return progress

    def get_status(self, instance_id):
        """
        Check on the status of an instance. ``instance_id`` needs to be a
        ``boto``-library copatible instance ID (e.g., ``i-8fehrdss``).If
        ``instance_id`` is not provided, the ID obtained when launching
        *the most recent* instance is used. Note that this assumes the instance
        being checked on was launched using this  class. Also note that the same
        class may be used to launch multiple instances but only the most recent
        ``instance_id`` is kept while any others will  to be explicitly specified.

        This method also allows the required ``ec2_conn`` connection object to be
        provided at invocation time. If the object is not provided, credentials
        defined for the class are used (ability to specify a custom ``ec2_conn``
        helps in case of stateless method invocations).

        Return a ``state`` dict containing the following keys: ``instance_state``,
        ``public_ip``, ``placement``, and ``error``, which capture CloudMan's
        current state. For ``instance_state``, expected values are: ``pending``,
        ``booting``, ``running``, or ``error`` and represent the state of the
        underlying instance. Other keys will return an empty  value until the
        ``instance_state`` enters ``running`` state.
        """
        ec2_conn = self.ec2_conn
        rs = None
        state = {'instance_state': "",
                 'public_ip': "",
                 'placement': "",
                 'error': ""}

        # Make sure we have an instance ID
        if instance_id is None:
            err = "Missing instance ID, cannot check the state."
            bioblend.log.error(err)
            state['error'] = err
            return state
        try:
            rs = ec2_conn.get_all_instances([instance_id])
            if rs is not None:
                inst_state = rs[0].instances[0].update()
                public_ip = rs[0].instances[0].ip_address
                state['public_ip'] = public_ip
                if inst_state == 'running':
                    cm_url = "http://{dns}/cloud".format(dns=public_ip)
                    # Wait until the CloudMan URL is accessible to return the data
                    if self._checkURL(cm_url) is True:
                        state['instance_state'] = inst_state
                        state['placement'] = rs[0].instances[0].placement
                    else:
                        state['instance_state'] = 'booting'
                else:
                    state['instance_state'] = inst_state
        except Exception as e:
            err = "Problem updating instance '%s' state: %s" % (instance_id, e)
            bioblend.log.error(err)
            state['error'] = err
        return state

    def get_clusters_pd(self, include_placement=True):
        """
        Return *persistent data* of all existing clusters for this account.

        :type include_placement: bool
        :param include_placement: Whether or not to include region placement for
                                  the clusters. Setting this option will lead
                                  to a longer function runtime.

        :rtype: dict
        :return: A dictionary containing keys ``clusters`` and ``error``. The
                 value of ``clusters`` will be a dictionary with the following keys
                 ``cluster_name``, ``persistent_data``, ``bucket_name`` and optionally
                 ``placement`` or an empty list if no clusters were found or an
                 error was encountered. ``persistent_data`` key value is yet
                 another dictionary containing given cluster's persistent data.
                 The value for the ``error`` key will contain a string with the
                 error message.

        .. versionadded:: 0.3
        .. versionchanged:: 0.7.0
            The return value changed from a list to a dictionary.
        """
        clusters = []
        response = {'clusters': clusters, 'error': None}
        s3_conn = self.connect_s3(self.access_key, self.secret_key, self.cloud)
        try:
            buckets = s3_conn.get_all_buckets()
        except S3ResponseError as e:
            response['error'] = "S3ResponseError getting buckets: %s" % e
        except self.http_exceptions as ex:
            response['error'] = "Exception getting buckets: %s" % ex
        if response['error']:
            bioblend.log.exception(response['error'])
            return response
        for bucket in [b for b in buckets if b.name.startswith('cm-')]:
            try:
                # TODO: first lookup if persistent_data.yaml key exists
                pd = bucket.get_key('persistent_data.yaml')
            except S3ResponseError:
                # This can fail for a number of reasons for non-us and/or
                # CNAME'd buckets but it is not a terminal error
                bioblend.log.warning("Problem fetching persistent_data.yaml "
                                     "from bucket %s" % bucket)
                continue
            if pd:
                # We are dealing with a CloudMan bucket
                pd_contents = pd.get_contents_as_string()
                pd = yaml.load(pd_contents)
                if 'cluster_name' in pd:
                    cluster_name = pd['cluster_name']
                else:
                    for key in bucket.list():
                        if key.name.endswith('.clusterName'):
                            cluster_name = key.name.split('.clusterName')[0]
                cluster = {'cluster_name': cluster_name,
                           'persistent_data': pd,
                           'bucket_name': bucket.name}
                # Look for cluster's placement too
                if include_placement:
                    placement = self._find_placement(cluster_name, cluster)
                    cluster['placement'] = placement
                clusters.append(cluster)
        response['clusters'] = clusters
        return response

    def get_cluster_pd(self, cluster_name):
        """
        Return *persistent data* (as a dict) associated with a cluster with the
        given ``cluster_name``. If a cluster with the given name is not found,
        return an empty dict.

        .. versionadded:: 0.3
        """
        cluster = {}
        clusters = self.get_clusters_pd().get('clusters', [])
        for c in clusters:
            if c['cluster_name'] == cluster_name:
                cluster = c
                break
        return cluster

    def connect_ec2(self, a_key, s_key, cloud=None):
        """
        Create and return an EC2-compatible connection object for the given cloud.

        See ``_get_cloud_info`` method for more details on the requirements for
        the ``cloud`` parameter. If no value is provided, the class field is used.
        """
        if cloud is None:
            cloud = self.cloud
        ci = self._get_cloud_info(cloud)
        r = RegionInfo(name=ci['region_name'], endpoint=ci['region_endpoint'])
        ec2_conn = boto.connect_ec2(aws_access_key_id=a_key,
                                    aws_secret_access_key=s_key,
                                    # api_version is needed for availability zone support for EC2
                                    api_version='2012-06-01' if ci['cloud_type'] == 'ec2' else None,
                                    is_secure=ci['is_secure'],
                                    region=r,
                                    port=ci['ec2_port'],
                                    path=ci['ec2_conn_path'],
                                    validate_certs=False)
        return ec2_conn

    def connect_s3(self, a_key, s_key, cloud=None):
        """
        Create and return an S3-compatible connection object for the given cloud.

        See ``_get_cloud_info`` method for more details on the requirements for
        the ``cloud`` parameter. If no value is provided, the class field is used.
        """
        if cloud is None:
            cloud = self.cloud
        ci = self._get_cloud_info(cloud)
        if ci['cloud_type'] == 'amazon':
            calling_format = SubdomainCallingFormat()
        else:
            calling_format = OrdinaryCallingFormat()
        s3_conn = S3Connection(
            aws_access_key_id=a_key, aws_secret_access_key=s_key,
            is_secure=ci['is_secure'], port=ci['s3_port'], host=ci['s3_host'],
            path=ci['s3_conn_path'], calling_format=calling_format)
        return s3_conn

    def _compose_user_data(self, user_provided_data):
        """
        A convenience method used to compose and properly format the user data
        required when requesting an instance.

        ``user_provided_data`` is the data provided by a user required to identify
        a cluster and user other user requirements.
        """
        form_data = {}
        # Do not include the following fields in the user data but do include
        # any 'advanced startup fields' that might be added in the future
        excluded_fields = ['sg_name', 'image_id', 'instance_id', 'kp_name',
                           'cloud', 'cloud_type', 'public_dns', 'cidr_range',
                           'kp_material', 'placement', 'flavor_id']
        for key, value in six.iteritems(user_provided_data):
            if key not in excluded_fields:
                form_data[key] = value
        # If the following user data keys are empty, do not include them in the request user data
        udkeys = ['post_start_script_url', 'worker_post_start_script_url', 'bucket_default', 'share_string']
        for udkey in udkeys:
            if udkey in form_data and form_data[udkey] == '':
                del form_data[udkey]
        # If bucket_default was not provided, add a default value to the user data
        # (missing value does not play nicely with CloudMan's ec2autorun.py)
        if not form_data.get('bucket_default', None) and self.cloud.bucket_default:
            form_data['bucket_default'] = self.cloud.bucket_default
        # Reuse the ``password`` for the ``freenxpass`` user data option
        if 'freenxpass' not in form_data and 'password' in form_data:
            form_data['freenxpass'] = form_data['password']
        # Convert form_data into the YAML format
        ud = yaml.dump(form_data, default_flow_style=False, allow_unicode=False)
        # Also include connection info about the selected cloud
        ci = self._get_cloud_info(self.cloud, as_str=True)
        return ud + "\n" + ci

    def _get_cloud_info(self, cloud, as_str=False):
        """
        Get connection information about a given cloud
        """
        ci = {}
        ci['cloud_type'] = cloud.cloud_type
        ci['region_name'] = cloud.region_name
        ci['region_endpoint'] = cloud.region_endpoint
        ci['is_secure'] = cloud.is_secure
        ci['ec2_port'] = cloud.ec2_port if cloud.ec2_port != '' else None
        ci['ec2_conn_path'] = cloud.ec2_conn_path
        # Include cidr_range only if not empty
        if cloud.cidr_range != '':
            ci['cidr_range'] = cloud.cidr_range
        ci['s3_host'] = cloud.s3_host
        ci['s3_port'] = cloud.s3_port if cloud.s3_port != '' else None
        ci['s3_conn_path'] = cloud.s3_conn_path
        if as_str:
            ci = yaml.dump(ci, default_flow_style=False, allow_unicode=False)
        return ci

    def _get_volume_placement(self, vol_id):
        """
        Returns the placement of a volume (or None, if it cannot be determined)
        """
        try:
            vol = self.ec2_conn.get_all_volumes(volume_ids=[vol_id])
        except EC2ResponseError as ec2e:
            bioblend.log.error("EC2ResponseError querying for volume {0}: {1}"
                               .format(vol_id, ec2e))
            vol = None
        if vol:
            return vol[0].zone
        else:
            bioblend.log.error("Requested placement of a volume '%s' that does not exist." % vol_id)
            return None

    def _find_placement(self, cluster_name, cluster=None):
        """
        Find a placement zone for a cluster with the name ``cluster_name``.

        By default, this method will search for and fetch given cluster's
        *persistent data*; alternatively, *persistent data* can be provided via
        the ``cluster`` parameter. This dict needs to have ``persistent_data``
        key with the contents of cluster's *persistent data*.
        If the cluster or the volume associated with the cluster cannot be found,
        cluster placement is set to ``None``.

        :rtype: dict
        :return: A dictionary with ``placement`` and ``error`` keywords.

        .. versionchanged:: 0.7.0
            The return value changed from a list to a dictionary.
        """
        placement = None
        response = {'placement': placement, 'error': None}
        cluster = cluster or self.get_cluster_pd(cluster_name)
        if cluster and 'persistent_data' in cluster:
            pd = cluster['persistent_data']
            try:
                if 'placement' in pd:
                    response['placement'] = pd['placement']
                elif 'data_filesystems' in pd:
                    # We have v1 format persistent data so get the volume first and
                    # then the placement zone
                    vol_id = pd['data_filesystems']['galaxyData'][0]['vol_id']
                    response['placement'] = self._get_volume_placement(vol_id)
                elif 'filesystems' in pd:
                    # V2 format.
                    for fs in [fs for fs in pd['filesystems'] if fs.get('kind', None) == 'volume' and 'ids' in fs]:
                        vol_id = fs['ids'][0]  # All volumes must be in the same zone
                        response['placement'] = self._get_volume_placement(vol_id)
                        # No need to continue to iterate through
                        # filesystems, if we found one with a volume.
                        break
            except Exception as exc:
                response['error'] = ("Exception while finding placement for "
                                     "cluster '{0}'. This can indicate malformed "
                                     "instance data. Or that this method is "
                                     "broken: {1}".format(cluster_name, exc))
                bioblend.log.error(response['error'])
                response['placement'] = None
        else:
            bioblend.log.debug("Insufficient info about cluster {0} to get placement."
                               .format(cluster_name))
        return response

    def find_placements(self, ec2_conn, instance_type, cloud_type, cluster_name=None):
        """
        Find a list of placement zones that support the specified instance type.

        If ``cluster_name`` is given and a cluster with the given name exist,
        return a list with only one entry where the given cluster lives.

        Searching for available zones for a given instance type is done by
        checking the spot prices in the potential availability zones for
        support before deciding on a region:
        http://blog.piefox.com/2011/07/ec2-availability-zones-and-instance.html

        Note that, currently, instance-type based zone selection applies only to
        AWS. For other clouds, all the available zones are returned (unless a
        cluster is being recreated, in which case the cluster's placement zone is
        returned sa stored in its persistent data.

        :rtype: dict
        :return: A dictionary with ``zones`` and ``error`` keywords.

        .. versionchanged:: 0.3
            Changed method name from ``_find_placements`` to ``find_placements``.
            Also added ``cluster_name`` parameter.

        .. versionchanged:: 0.7.0
            The return value changed from a list to a dictionary.
        """
        # First look for a specific zone a given cluster is bound to
        zones = []
        response = {'zones': zones, 'error': None}
        if cluster_name:
            placement = self._find_placement(cluster_name)
            if placement.get('error'):
                response['error'] = placement['error']
                return response
            response['zones'] = placement.get('placement', [])
        # If placement is not found, look for a list of available zones
        if not response['zones']:
            in_the_past = datetime.datetime.now() - datetime.timedelta(hours=1)
            back_compatible_zone = "us-east-1e"
            for zone in [z for z in ec2_conn.get_all_zones() if z.state == 'available']:
                # Non EC2 clouds may not support get_spot_price_history
                if instance_type is None or cloud_type != 'ec2':
                    zones.append(zone.name)
                elif ec2_conn.get_spot_price_history(instance_type=instance_type,
                                                     end_time=in_the_past.isoformat(),
                                                     availability_zone=zone.name):
                    zones.append(zone.name)
            zones.sort(reverse=True)  # Higher-lettered zones seem to have more availability currently
            if back_compatible_zone in zones:
                zones = [back_compatible_zone] + [z for z in zones if z != back_compatible_zone]
            if len(zones) == 0:
                response['error'] = ("Did not find availabilty zone for {1}"
                                     .format(instance_type))
                bioblend.log.error(response['error'])
                zones.append(back_compatible_zone)
        return response

    def _checkURL(self, url):
        """
        Check if the ``url`` is *alive* (i.e., remote server returns code 200(OK)
        or 401 (unauthorized)).
        """
        try:
            p = urlparse(url)
            h = HTTPConnection(p[1])
            h.putrequest('HEAD', p[2])
            h.endheaders()
            r = h.getresponse()
            if r.status in (200, 401):  # CloudMan UI is pwd protected so include 401
                return True
        except Exception:
            # No response or no good response
            pass
        return False