/usr/share/pyshared/drslib/mip_table.py is in python-drslib 0.3.0a3-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | # BSD Licence
# Copyright (c) 2010, Science & Technology Facilities Council (STFC)
# All rights reserved.
#
# See the LICENSE file in the source distribution of this software for
# the full license text.
"""
Simple parser for MIP tables.
My interpretation of the format from reading the CMIP5 tables.
"""
import re
from glob import glob
import csv
import logging
log = logging.getLogger(__name__)
entry_ids = ['axis_entry', 'variable_entry']
line_rexp = re.compile(r'(\w+):\s*(.*)')
dquote = '"(?:""|[^"])*"'
squote = "'(?:''|[^'])*'"
val_rexp = re.compile('%s|%s|!|[^!"\']+' % (dquote, squote))
expt_id_ok_rexp = re.compile('(?P<sep1>[\'"])(?P<desc>.*?)(?P=sep1)\s*(?P<sep2>[\'"])(?P<id>.*?)(?P=sep2)')
class error(Exception):
pass
def parse_line(line):
# Strip out comment
line, comment = split_comment(line.strip())
if line:
mo = line_rexp.match(line)
if not mo:
raise error('Unrecognised line: %s' % line)
entry, value = mo.groups()
value = value.strip()
else:
entry = value = None
return (entry, value, comment)
def iter_table(fh):
"""
Generates events (entry, value, comment) by reading a MIP table from a
file object.
"""
for line in fh:
line = line.strip()
if line:
yield parse_line(line)
def iter_entries(fh):
"""
Generate events (entry_name, value_dict) by reading a MIP table from a
file object.
"""
entry_type = 'global'
entry_name = None
d = {}
for name, value, comment in iter_table(fh):
if name is None:
continue
if name in entry_ids:
yield (entry_type, entry_name, d)
entry_type = name
entry_name = value
d = {}
else:
d.setdefault(name, []).append(value)
yield (entry_type, entry_name, d)
def split_comment(line):
"""
Detect comment.
Quoted '!' characters are detected.
"""
parts = val_rexp.findall(line)
try:
i = parts.index('!')
except ValueError:
value = ''.join(parts)
comment = None
else:
value = ''.join(parts[:i])
comment = ''.join(parts[i:])
return (value, comment)
class MIPTable(object):
"""
Hold information from a MIP table.
This information is used to enforce DRS vocabularies.
:property name: The name of the MIP table as used in DRS filenames.
:property variables: A list of variables in this table.
:property experiments: A list of valid experiment ids for this table.
"""
def __init__(self, filename):
"""
:param filename: Name of file containing the MIP table.
"""
fh = open(filename)
self._vardict = {}
self._read_entries(fh)
self._init_experiments()
def _read_entries(self, fh):
for entry_type, entry_name, d in iter_entries(fh):
if entry_type == 'global':
self.name = re.match('Table (.*)', d['table_id'][0]).group(1)
self._globals = d
elif entry_type == 'variable_entry':
self._vardict[entry_name] = d
else:
# Ignore other entry types
pass
def _init_experiments(self):
self._exptdict = {}
for value in self._globals.get('expt_id_ok', []):
mo = expt_id_ok_rexp.match(value)
if not mo:
raise error("Error parsing expt_id_ok value %s" % value)
self._exptdict[mo.group('id')] = mo.group('desc')
@property
def variables(self):
return list(self._vardict.keys())
@property
def experiments(self):
return list(self._exptdict.keys())
@property
def frequency(self):
try:
return self._globals['frequency'][0]
except KeyError:
raise AttributeError()
def get_variable_attr(self, variable, attr):
"""
Retrieve an attribute of variable.
If the attributes isn't in the variable entry the global
value is returned
"""
if variable not in self._vardict:
raise ValueError('Variable %s not found' % variable)
try:
return self._vardict[variable][attr]
except KeyError:
return self.get_global_attr(attr)
def get_global_attr(self, attr):
try:
return self._globals[attr]
except KeyError:
raise AttributeError('Attribute %s is not a global entry' % attr)
class MIPTableStore(object):
"""
Holds a collection of mip tables.
:property tables: A mapping of table names to IMIPTable instances
"""
def __init__(self, table_glob):
"""
:param table_glob: A wildcard pattern for all MIP tables to load.
"""
self.tables = {}
for filename in glob(table_glob):
self.add_table(filename)
def add_table(self, filename):
"""
Read filename as a MIP table and add it to the store.
:return: The added MIPTable instance.
"""
t = MIPTable(filename)
log.info('Adding table %s from %s to table store' % (t.name, filename))
self.tables[t.name] = t
return t
def get_variable_attr(self, table, variable, attr):
"""
Return the value of a variable's attribute in a given table.
"""
v = self.get_variable_attr_mv(table, variable, attr)
if len(v) != 1:
raise ValueError('%s is a multi-valued MIP attribute' % v)
return v[0]
def get_variable_attr_mv(self, table, variable, attr):
"""
Return the value of a variable's attribute in a given table.
"""
if table not in self.tables:
raise ValueError('Table %s not found' % table)
return self.tables[table].get_variable_attr(variable, attr)
def get_global_attr(self, table, attr):
"""
Return global table attribute.
"""
v = self.get_global_attr_mv(table, attr)
if len(v) != 1:
raise ValueError('%s is a multi-valued MIP attribute' % v)
return v[0]
def get_global_attr_mv(self, table, attr):
"""
Return the value of a variable's attribute in a given table.
"""
if table not in self.tables:
raise ValueError('Table %s not found' % table)
return self.tables[table].get_global_attr(attr)
#!FIXME
#def get_variable_tables(self, variable, **attribute_constraints):
# ret = []
# for table in self.tables.values():
# if variable in table.variables:
# for k, v in attribute_constraints.items():
# try:
# if table.get_variable_attr(variable, k) != v:
# break
# except AttributeError:
# pass
# else:
# ret.append(table.name)
#
# return ret
def read_model_table(table_csv):
"""
Read Karl's CMIP5_models.xls file in CSV export format and
return a map of institute to model name.
This function is invoked internally to load CMIP5_models.xls from inside
drslib.
"""
fh = open(table_csv)
table_reader = csv.reader(fh)
# Check first 2 lines look like the right file
header1 = next(table_reader)
header2 = next(table_reader)
assert "CMIP5 Modeling Groups" in header1[0]
assert 'Abbreviated name of center or group' in header2[1]
assert "modified model_id" in header2[4]
model_map = {}
for row in table_reader:
institute = row[1]
model = row[4]
# If institute contains a "/" take the first item
if '/' in institute:
institute = institute.split('/')[0]
if model in model_map:
raise "Duplicate model key %s" % model
model_map[model] = institute
return model_map
|