This file is indexed.

/usr/share/pyshared/drslib/thredds.py is in python-drslib 0.3.0a3-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
"""
Check metadata in a THREDDS catalog is consistent with the DRS.

Things to check:

1. All DRS components set as properties and validate with drslib
2. drs_id is consistent with properties
3. version is a date
4. dataset urlPath is consistent with the DRS directory structure.
5. Checksums are present and the right format (NOT 'MD5:...')
6. tracking_id is present
7. Check product assignement is right.

Currently implemented: 1-3

"""

import sys
import os
import re

from lxml import etree as ET
from drslib.drs import DRS
from drslib.cmip5 import make_translator
from optparse import OptionParser
try:
    from urllib.parse import urlparse
except ImportError:
    # Python 2
    from urlparse import urlparse

import logging
log = logging.getLogger(__name__)

THREDDS_NS = 'http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0'
XLINK_NS = 'http://www.w3.org/1999/xlink'

usage = """%prog [options] thredds ...

thredds:
  A thredds url or file path of a dataset published by esgpublish.\
"""



trans = make_translator('')
drs_prop_map = {'dataset_version': 'version',
                'project': 'activity',
                'experiment': 'experiment',
                'product': 'product',
                'model': 'model',
                'time_frequency': 'frequency',
                'realm': 'realm',
                'cmor_table': 'table',
                'ensemble': 'ensemble',
                'institute': 'institute'
                }


class InvalidThreddsException(Exception):
    """
    An exception raised to indicate failure of a ThreddsCheck
    """
    pass

class CheckNotPossible(Exception):
    """
    Raised to indicate a check doesn't have enough information to continue.

    """
    pass

class ThreddsCheck(object):
    """
    Base class of all checks, defining the interface.

    """

    def __init__(self, environ=None):
        """
        environ is a dictionary shared accross all checks allowing
        checks to share information.
        """
        self.environ = environ


    def check(self, etree):
        """
        Check the THREDDS catalogue represented by an ElementTree object.

        """
        pass



def run_checks(etree, checks, environ=None):
    """
    Run a sequence of checks on a THREDDS catalogue as an ElementTree.
    InvalidThreddsExceptions are converted to log messages.
 
    """
    if environ is None:
        environ = {}

    for CheckClass in checks:
        check = CheckClass(environ)
        try:
            check.check(etree)
        except InvalidThreddsException as e:
            log.error(e)
        except CheckNotPossible:
            log.warn('Check %s aborted' % CheckClass.__name__)
        else:
            log.info('Check %s succeeded' % CheckClass.__name__)

    return environ

class DRSIdCheck(ThreddsCheck):
    """
    Check drs_id is present and consistent with dataset_id.

    """

    def check(self, etree):
        dataset = get_dataset(etree)

        drs_id = get_property(dataset, 'drs_id')
        dataset_id = get_property(dataset, 'dataset_id')

        # Check 2 ids are consistent
        if drs_id != dataset_id:
            raise InvalidThreddsException("dataset_id != drs_id for dataset %s" %
                                          dataset.get('ID'))

        self.environ['dataset_id'] = dataset_id
        self.environ['drs_id'] = drs_id


class DRSPropCheck(ThreddsCheck):
    """
    Check all drs components are defined as properties.

    Creates a drs attribute in the environment if successful.

    """
    def check(self, etree):
        dataset = get_dataset(etree)

        props = {}
        for prop_name in drs_prop_map:
            prop = get_property(dataset, prop_name)
            if prop_name is 'dataset_version':
                prop = int(prop)
            elif prop_name is 'ensemble':
                #!TODO: refactor this to share code with drslib.translate
                mo = re.match(r'(?:r(\d+))?(?:i(\d+))?(?:p(\d+))?', prop)
                if not mo:
                    raise InvalidThreddsException('Unrecognised ensemble syntax %s' % prop)

                (r, i, p) = mo.groups()
                prop = tuple(x and int(x) for x in (r, i, p))

            props[drs_prop_map[prop_name]] = prop

        drs = DRS(**props)

        # If present in environ check against drs_id
        if 'drs_id' in self.environ:
            if drs.to_dataset_id() != self.environ['drs_id']:
                raise InvalidThreddsException("drs properties inconsistent with drs_id for dataset %s" %
                                              dataset.get('ID'))

        self.environ['drs'] = drs


class ValidDRSCheck(ThreddsCheck):
    """
    Check the drs object in the environment is valid.

    """

    def check(self, etree):
        if 'drs' not in self.environ:
            raise CheckNotPossible

        drs = self.environ['drs']
        
        try:
            path = trans.drs_to_path(drs)
        except:
            raise InvalidThreddsException("drs %s fails to validate" % drs)


class ValidDateCheck(ThreddsCheck):
    """
    Check date versioning.

    """
    def check(self, etree):
        if not 'drs' in self.environ:
            raise CheckNotPossible

        drs = self.environ['drs']
        if not drs.version > 20100101:
            raise InvalidThreddsException("The version of dataset doesn't look like a date: %s" %
                                          drs)

#
# Utility functions
#
def get_dataset(etree):
    # There should be only 1 top-level dataset element
    datasets =  etree.findall('{%s}dataset' % THREDDS_NS)
    if len(datasets) != 1:
        raise InvalidThreddsException("More than one top-level dataset")

    return datasets[0]


def get_property(dataset, name):
    prop = dataset.find('{%s}property[@name="%s"]' % 
                      (THREDDS_NS, name))
    if prop is None:
        raise InvalidThreddsException("Property %s not found in dataset %s" % 
                                      (name, dataset.get('ID')))
                                      
    return prop.get('value')


def read_master_catalog(catalog_url):
    """
    Read master catalogue and generate dataset catalogue ElementTree objects.

    """
    cat_etree = ET.parse(catalog_url)
    scheme, netloc, path, query, fragment = urlparse.urlsplit(catalog_url)
    base_url = urlparse.urlunsplit((scheme, netloc, os.path.dirname(path)+'/', None, None))

    for catalog_ref in cat_etree.findall('{%s}catalogRef' % THREDDS_NS):
        ds_url = catalog_ref.get('{%s}href' % XLINK_NS)
        abs_ds_url = urlparse.urljoin(base_url, ds_url)
        
        yield abs_ds_url


def main(argv=sys.argv):
    logging.basicConfig(level=logging.ERROR)

    checks = [DRSIdCheck, DRSPropCheck, ValidDRSCheck, ValidDateCheck]

    op = OptionParser(usage)
    op.add_option('-c', '--catalog', action='store',
                  help="Scan root THREDDS catalog CATALOG for catalogRef "
                        "elements and check each referenced catalog")

    opts, args = op.parse_args(argv[1:])

    xmls = args
    if opts.catalog:
        log.info('Discovering catalogs from master catalog %s' % opts.catalog)
        xmls += list(read_master_catalog(opts.catalog))

    if not xmls:
        op.print_help()

    for xml in xmls:
        log.info('Checking %s' % xml)
        etree = ET.parse(xml)
        run_checks(etree, checks)

if __name__ == '__main__':
    main()