This file is indexed.

/usr/bin/biom_validator is in python-biom-format 1.1.2-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#! /usr/bin/python

"""Validate a Biological Observation Matrix (biom) formatted file

For more details, go to: http://biom-format.org
"""

import json
from httplib import HTTP 
from urlparse import urlparse 
from operator import and_
import dateutil.parser

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2012, The BIOM-Format Project"
__credits__ = ["Daniel McDonald", "Jose Clemente", "Greg Caporaso", 
               "Jai Rideout", "Justin Kuczynski", "Andreas Wilke",
               "Tobias Paczian", "Rob Knight", "Folker Meyer", 
               "Sue Huse"]
__url__ = "http://biom-format.org"
__license__ = "GPL"
__version__ = "1.1.2"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"

def is_int(x):
    """Return True if x is int"""
    return isinstance(x, int)

def valid_format_url(table):
    """Check if the format_url is correct"""
    if VERBOSE:
        print "Validating format_url..."
        
    if table['format_url'] != FORMAT_URL:
        raise ValueError, "Invalid format_url"

def valid_shape(table):
    """A matrix header is (int, int) representing the size of a 2D matrix"""
    if VERBOSE:
        print "Validating shape..."
    
    a,b = table['shape']

    if not (is_int(a) and is_int(b)):
        raise ValueError, "'shape' values do not appear to be integers"

def valid_matrix_type(table):
    """Returns True if x is a valid matrix type"""
    if VERBOSE:
        print "Validating matrix_type..."
        
    if table['matrix_type'] not in MATRIX_TYPES:
        raise ValueError, "Unknown matrix_type"

def valid_matrix_element_type(table):
    """Return True if table['matrix_element_type'] is a valid element type"""
    if VERBOSE:
        print "Validating matrix_element_type..."
        
    if table['matrix_element_type'] not in ELEMENT_TYPES:
        raise ValueError, "Unknown matrix_element_type"

def valid_datetime(table):
    """Verify datetime can be parsed

    Expects ISO 8601 datetime format (for example, 2011-12-19T19:00:00
                                      note that a 'T' separates the date 
                                      and time)
    """
    if VERBOSE:
        print "Validating datetime..."
        
    try:
        foo = dateutil.parser.parse(table['date'])
    except:
        raise ValueError, "Timestamp does not appear to be ISO 8601"

def valid_sparse_data(table):
    """All index positions must be integers and values are of dtype"""
    if VERBOSE:
        print "Validating data (sparse)..."
        
    dtype = ELEMENT_TYPES[table['matrix_element_type']]
    n_rows, n_cols = table['shape']
    n_rows -= 1 # adjust for 0-based index
    n_cols -= 1 # adjust for 0-based index

    for idx, coord in enumerate(table['data']):
        try:
            x,y,val = coord
        except:
            raise ValueError, "Bad matrix entry idx %d: %s" % (idx,repr(coord))

        if not is_int(x) or not is_int(y):
            raise ValueError, "Bad x or y type at idx %d: %s" % (idx,repr(coord))

        if not isinstance(val, dtype):
            raise ValueError, "Bad value at idx %d: %s" % (idx,repr(coord))

        if x < 0 or x > n_rows:
            raise ValueError, "x out of bounds at idx %d: %s" % (idx,repr(coord))

        if y < 0 or y > n_cols:
            raise ValueError, "y out of bounds at idx %d: %s" % (idx,repr(coord))

def valid_dense_data(table):
    """All elements must be of dtype and correspond to shape"""
    if VERBOSE:
        print "Validating data (dense)..."
        
    dtype = ELEMENT_TYPES[table['matrix_element_type']]
    n_rows, n_cols = table['shape']
   
    for row in table['data']:
        if len(row) != n_cols:
            raise ValueError, "Incorrect number of cols: %s" % repr(row)
        
        if not reduce(and_, [isinstance(v, dtype) for v in row]):
            raise ValueError, "Bad datatype in row: %s" % repr(row)

    if len(table['data']) != n_rows:
        raise ValueError, "Incorrect number of rows in matrix"

def valid_format(table):
    """Format must be the expected version"""
    if VERBOSE:
        print "Validating format..."
        
    if table['format'] != FORMAT_STRING:
        raise ValueError, "Invalid 'format' %s, must be %s" % \
                (table['format'], FORMAT_STRING)

def valid_type(table):
    """Table must be a known table type"""
    if VERBOSE:
        print "Validating type..."
        
    if table['type'].lower() not in BIOM_TYPES:
        raise ValueError, "Unknown BIOM type: %s" % table['type']

def valid_biom(table, check_url=False):
    """Validate a BIOM object
    
    Raises AttributeError if an expected key is missing
    Raises ValueError if the values at a key appear to be malformed
    """
    if VERBOSE:
        print "Validating biom object..."
        
    required_keys = [('format', valid_format),
                     ('format_url', valid_format_url),
                     ('type', valid_type),
                     ('rows', valid_rows),
                     ('columns', valid_columns),
                     ('shape', valid_shape),
                     ('data', valid_data),
                     ('matrix_type', valid_matrix_type),
                     ('matrix_element_type', valid_matrix_element_type),
                     ('generated_by', valid_generated_by),
                     ('id', valid_nullable_id),
                     ('date', valid_datetime)]

    for key,method in required_keys:
        if key not in table:
            raise AttributeError, "MISSING FIELD: '%s'" % key
        method(table)
        #    raise ValueError, "FIELD '%s' INVALID: %s" % (key, repr(table[key]))

    if len(table['rows']) != table['shape'][0]:
        raise ValueError, "Number of rows in 'rows' is not equal to 'shape'"

    if len(table['columns']) != table['shape'][1]:
        raise ValueError, "Number of columns in 'columns' is not equal to 'shape'"

def valid_generated_by(table):
    """Validate the generated_by field"""
    if VERBOSE:
        print "Validating generated_by..."
        
    if not table['generated_by']:
        raise ValueError, "'generated_by' is not populated"
    if not isinstance(table['generated_by'], unicode):
        raise ValueError, "'generated_by' is not a string"

def valid_nullable_id(table):
    """Validate the table id"""
    # this is nullable and don't actually care what is in here
    return

def valid_id(record):
    """Validate id for a row or column"""
    if not record['id']:
        raise ValueError, "'id' in %s appears empty" % record

def valid_metadata(record):
    """Validate the metadata field for a row or column"""
    # this is nullable and don't actually care what is in here
    if record['metadata'] is None:
        return
    if isinstance(record['metadata'], dict):
        return

    raise ValueError, "metadata is neither null or an object"

def valid_rows(table):
    """Validate the 'rows' under 'table'
    
    Raises AttributeError if an expected key is missing
    Raises ValueError if the values at a key appear to be malformed
    """
    if VERBOSE:
        print "Validating rows..."
        
    required_keys = [('id', valid_id), ('metadata', valid_metadata)]
    required_by_type = {}
    required_keys.extend(required_by_type.get(table['type'].lower(), []))

    for idx,row in enumerate(table['rows']):
        for key, method in required_keys:
            if key not in row:
                raise AttributeError, "ROW IDX %d MISSING '%s' FIELD" % (idx,key)
            method(row)

def valid_columns(table):
    """Validate the 'columns' under 'table'

    Raises AttributeError if an expected key is missing
    Raises ValueError if the values at a key appear to be malformed
    """
    if VERBOSE:
        print "Validating columns..."

    required_keys = [('id', valid_id), ('metadata', valid_metadata)]
    required_by_type = {} 
    required_keys.extend(required_by_type.get(table['type'].lower(), []))

    for idx, col in enumerate(table['columns']):
        for key, method in required_keys:
            if key not in col:
                raise AttributeError, "COL IDX %d MISSING '%s' FIELD" % (idx,key)
            method(col)

def valid_data(table):
    """Validate the 'matrix' under 'table'

    Raises AttributeError if an expected key is missing
    Raises ValueError if the values at a key appear to be malformed
    """
    if table['matrix_type'].lower() == 'sparse':
        valid_sparse_data(table)
    elif table['matrix_type'].lower() == 'dense':
        valid_dense_data(table)
    else:
        raise AttributeError, "Unknown matrix type"

try:
    from cogent.util.option_parsing import parse_command_line_parameters, make_option
    cogent_cl_parsing = True
except ImportError:
    from sys import argv
    cogent_cl_parsing = False

FORMAT_URL = "http://biom-format.org"
FORMAT_STRING = "Biological Observation Matrix 1.0.0"
BIOM_TYPES = set(['otu table', 'pathway table', 'function table', 
                  'ortholog table', 'gene table', 'metabolite table', 
                  'taxon table'])
MATRIX_TYPES = set(['sparse', 'dense'])
ELEMENT_TYPES = {'int':int,'str':str,'float':float, 'unicode':unicode}
VERBOSE = False

if cogent_cl_parsing:
    script_info = {}
    script_info['brief_description'] = "Test a biom file for adherence to the format specification."
    script_info['script_description'] = "Test a biom file for adherence to the format specification. This specification is defined at http://biom-format.org."
    script_info['script_usage'] = [("","Validate the my_data.biom file.","%prog -i my_data.biom")]
    script_info['output_description']= ""
    script_info['required_options'] = [
     make_option('-i','--biom_fp',type="existing_filepath",
                 help='the BIological Observation Matrix filepath to validate'),
    ]
    script_info['optional_options'] = [
     make_option('-f','--format-version',type="string", 
             default=FORMAT_STRING,
             help='The specific format string, defaults to [default: %default]')]
    script_info['version'] = __version__

if __name__ == '__main__':
    if cogent_cl_parsing:
        option_parser, opts, args =\
         parse_command_line_parameters(**script_info)
        biom_fp = opts.biom_fp
        VERBOSE = opts.verbose
        FORMAT_STRING = opts.format_version
        valid_biom(json.load(open(biom_fp)))
    else:
        if '-v' in argv:
            VERBOSE = True
            argv.remove('-v')
        elif '--verbose' in argv:
            VERBOSE = True
            argv.remove('--verbose')
            
        if len(argv) == 3:
            biom_fp = open(argv[2])
        elif len(argv) == 5:
            biom_fp = open(argv[2])
            FORMAT_STRING = argv[4]
        else:
            print "Error parsing command.\nUSAGE: biom_validator.py -i my_file.biom"
            print "\nOptional:\n\t-f\tSpecify the format string, default to '%s'" % FORMAT_STRING
            exit()
        valid_biom(json.load(biom_fp))