This file is indexed.

/usr/lib/python3/dist-packages/bioblend/galaxy/datasets/__init__.py is in python3-bioblend 0.7.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
Contains possible interactions with the Galaxy Datasets
"""
import logging
import os
import shlex
import time

import requests
from six.moves import range
from six.moves.urllib.parse import urljoin
from six.moves.urllib.request import urlopen

from bioblend.galaxy.client import Client

log = logging.getLogger(__name__)


class DatasetClient(Client):
    def __init__(self, galaxy_instance):
        self.module = 'datasets'
        super(DatasetClient, self).__init__(galaxy_instance)

    def show_dataset(self, dataset_id, deleted=False, hda_ldda='hda'):
        """
        Display information about and/or content of a dataset. This can be a
        history or a library dataset.

        :type dataset_id: str
        :param dataset_id: Encoded dataset ID

        :type deleted: bool
        :param deleted: Whether to return results for a deleted dataset

        :type hda_ldda: str
        :param hda_ldda: Whether to show a history dataset ('hda' - the default) or library
                         dataset ('ldda').
        """
        params = dict(
            hda_ldda=hda_ldda,
        )
        return Client._get(self, id=dataset_id, deleted=deleted, params=params)

    def download_dataset(self, dataset_id, file_path=None, use_default_filename=True,
                         wait_for_completion=False, maxwait=12000):
        """
        Downloads the dataset identified by 'id'.

        :type dataset_id: str
        :param dataset_id: Encoded dataset ID

        :type file_path: str
        :param file_path: If the file_path argument is provided, the dataset will be streamed to disk
                          at that path (Should not contain filename if use_default_name=True).
                          If the file_path argument is not provided, the dataset content is loaded into memory
                          and returned by the method (Memory consumption may be heavy as the entire file
                          will be in memory).

        :type use_default_filename: bool
        :param use_default_filename: If the use_default_name parameter is True, the exported
                                 file will be saved as file_path/%s,
                                 where %s is the dataset name.
                                 If use_default_name is False, file_path is assumed to
                                 contain the full file path including filename.

        :type wait_for_completion: bool
        :param wait_for_completion: If wait_for_completion is True, this call will block until the dataset is ready.
                                    If the dataset state becomes invalid, a DatasetStateException will be thrown.

        :type maxwait: float
        :param maxwait: Time (in seconds) to wait for dataset to complete.
                        If the dataset state is not complete within this time, a DatasetTimeoutException will be thrown.

        :rtype: dict
        :return: If a file_path argument is not provided, returns a dict containing the file_content.
                 Otherwise returns nothing.
        """
        if wait_for_completion:
            self._block_until_dataset_ready(dataset_id, maxwait=maxwait)

        dataset = self.show_dataset(dataset_id)
        if not dataset['state'] == 'ok':
            raise DatasetStateException("Dataset not ready. Dataset id: %s, current state: %s" % (dataset_id, dataset['state']))

        # Galaxy release_13.01 and earlier does not have file_ext in the dataset
        # dict, so resort to data_type.
        # N.B.: data_type cannot be used for Galaxy release_14.10 and later
        # because it was changed to the Galaxy datatype class
        file_ext = dataset.get('file_ext', dataset['data_type'])
        # The preferred download URL is
        # '/api/histories/<history_id>/contents/<dataset_id>/display?to_ext=<dataset_ext>'
        # since the old URL:
        # '/dataset/<dataset_id>/display/to_ext=<dataset_ext>'
        # does not work when using REMOTE_USER with access disabled to
        # everything but /api without auth
        if 'url' in dataset:
            # This is Galaxy release_15.03 or later
            download_url = dataset['download_url'] + '?to_ext=' + file_ext
        else:
            # This is Galaxy release_15.01 or earlier, for which the preferred
            # URL does not work without a key, so resort to the old URL
            download_url = 'datasets/' + dataset_id + '/display?to_ext=' + file_ext
        url = urljoin(self.gi.base_url, download_url)

        # Don't use self.gi.make_get_request as currently the download API does
        # not require a key
        r = requests.get(url, verify=self.gi.verify)

        if file_path is None:
            return r.content
        else:
            if use_default_filename:
                try:
                    # First try to get the filename from the response headers
                    # We expect tokens 'filename' '=' to be followed by the quoted filename
                    tokens = [x for x in shlex.shlex(r.headers['content-disposition'], posix=True)]
                    header_filepath = tokens[tokens.index('filename') + 2]
                    filename = os.path.basename(header_filepath)
                except (ValueError, IndexError):
                    # If the filename was not in the header, build a useable filename ourselves.
                    filename = dataset['name'] + '.' + file_ext

                file_local_path = os.path.join(file_path, filename)
            else:
                file_local_path = file_path

            with open(file_local_path, 'wb') as fp:
                fp.write(r.content)

            # Return location file was saved to
            return file_local_path

    def _is_dataset_complete(self, dataset_id):
        dataset = self.show_dataset(dataset_id)
        state = dataset['state']
        return (state == 'ok' or state == 'error')

    def _block_until_dataset_ready(self, dataset_id, maxwait=12000, interval=30, raise_on_timeout=True):
        """
        Wait until the dataset state changes to ok or error.
        Based on: https://github.com/salimfadhley/jenkinsapi/blob/master/jenkinsapi/api.py
        """
        assert maxwait > 0
        assert maxwait > interval
        assert interval > 0

        for time_left in range(maxwait, 0, -interval):
            if self._is_dataset_complete(dataset_id):
                return
            log.warn("Waiting for dataset %s to complete. Will wait another %is" % (dataset_id, time_left))
            time.sleep(interval)
        if raise_on_timeout:
            # noinspection PyUnboundLocalVariable
            raise DatasetTimeoutException("Waited too long for dataset to complete: %s" % dataset_id)

    def show_stderr(self, dataset_id):
        """
        Display stderr output of a dataset.

        :type dataset_id: str
        :param dataset_id: Encoded dataset ID
        """
        res = urlopen(self.url[:-len("/api/datasets/") + 1] + "/datasets/" + dataset_id + "/stderr")
        return res.read()

    def show_stdout(self, dataset_id):
        """
        Display stdout output of a dataset.

        :type dataset_id: str
        :param dataset_id: Encoded dataset ID
        """
        res = urlopen(self.url[:-len("/api/datasets/") + 1] + "/datasets/" + dataset_id + "/stdout")
        return res.read()


class DatasetStateException(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


class DatasetTimeoutException(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)