This file is indexed.

/usr/bin/subset_biom is in python-biom-format 1.1.2-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#! /usr/bin/python

from biom.parse import get_axis_indices, direct_slice_data, direct_parse_key

__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2012, The BIOM-Format Project"
__credits__ = ["Daniel McDonald"]
__url__ = "http://biom-format.org"
__license__ = "GPL"
__version__ = "1.1.2"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"

try:
    from cogent.util.option_parsing import parse_command_line_parameters, \
            make_option
    cogent_cl_parsing = True
except ImportError:
    from sys import argv
    cogent_cl_parsing = False

if cogent_cl_parsing:
    script_info = {}
    script_info['brief_description'] = "Subset a BIOM file."
    script_info['script_description'] = "Subset a BIOM file, over either the observations or samples, without fully parsing it. This script is intended to assist working with very large tables when tight on memory, or as a light weight way to subset a full table. Currently, it is possible to produce tables with rows or columns (observations or samples) that are fully zerod."
    script_info['script_usage'] = [("","Subset the observations in my_data.biom file.","%prog -i my_data.biom -a observations -s file_with_ids")]
    script_info['output_description']= ""
    script_info['required_options'] = [
     make_option('-i','--biom_fp',type="existing_filepath",
                 help='the BIological Observation Matrix filepath'),
     make_option('-a','--axis', type='choice',
                  choices=['observations','samples'],
                  help="The axis to subset over"),
     make_option('-s','--ids_fp',type="existing_filepath",
                 help="A file containing a single column of IDs to retain"),
     make_option('-o','--output_fp',type="new_filepath",
                 help="A file to write the result to")
    ]
    script_info['version'] = __version__
else:
    from optparse import OptionParser, make_option
    options = [
     make_option('-i','--biom_fp',type="string",
                 help='the BIological Observation Matrix filepath'),
     make_option('-a','--axis', type='string',
                  help="The axis to subset over, either 'samples' or 'observations'"),
     make_option('-s','--ids_fp',type="string",
                 help="A file containing a single column of IDs to retain"),
     make_option('-o','--output_fp',type="string",
                 help="A file to write the result to")
    ]
    
if __name__ == '__main__':
    if cogent_cl_parsing:
        option_parser, opts, args =\
                     parse_command_line_parameters(**script_info)
    else:
        parser = OptionParser(option_list=options)
        opts, args = parser.parse_args()

    ids = [l.strip() for l in open(opts.ids_fp)]
    biom_str = open(opts.biom_fp).read()

    idxs, new_axis_md = get_axis_indices(biom_str, ids, opts.axis)
    new_data = direct_slice_data(biom_str, idxs, opts.axis)
    output = open(opts.output_fp,'w')

    # multiple walks over the file. bad form, but easy right now
    # ...should add a yield_and_ignore parser or something.
    output.write('{')
    output.write(direct_parse_key(biom_str, "id"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "format"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "format_url"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "type"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "generated_by"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "date"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "matrix_type"))
    output.write(",")
    output.write(direct_parse_key(biom_str, "matrix_element_type"))
    output.write(",")
    output.write(new_data)
    output.write(",")
    output.write(new_axis_md)
    output.write(",")
    if opts.axis == "observations":
        output.write(direct_parse_key(biom_str, "columns"))
    else:
        output.write(direct_parse_key(biom_str, "rows"))
    output.write("}")
    output.close()