/usr/bin/subset_biom is in python-biom-format 1.1.2-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | #! /usr/bin/python
from biom.parse import get_axis_indices, direct_slice_data, direct_parse_key
__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2012, The BIOM-Format Project"
__credits__ = ["Daniel McDonald"]
__url__ = "http://biom-format.org"
__license__ = "GPL"
__version__ = "1.1.2"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"
try:
from cogent.util.option_parsing import parse_command_line_parameters, \
make_option
cogent_cl_parsing = True
except ImportError:
from sys import argv
cogent_cl_parsing = False
if cogent_cl_parsing:
script_info = {}
script_info['brief_description'] = "Subset a BIOM file."
script_info['script_description'] = "Subset a BIOM file, over either the observations or samples, without fully parsing it. This script is intended to assist working with very large tables when tight on memory, or as a light weight way to subset a full table. Currently, it is possible to produce tables with rows or columns (observations or samples) that are fully zerod."
script_info['script_usage'] = [("","Subset the observations in my_data.biom file.","%prog -i my_data.biom -a observations -s file_with_ids")]
script_info['output_description']= ""
script_info['required_options'] = [
make_option('-i','--biom_fp',type="existing_filepath",
help='the BIological Observation Matrix filepath'),
make_option('-a','--axis', type='choice',
choices=['observations','samples'],
help="The axis to subset over"),
make_option('-s','--ids_fp',type="existing_filepath",
help="A file containing a single column of IDs to retain"),
make_option('-o','--output_fp',type="new_filepath",
help="A file to write the result to")
]
script_info['version'] = __version__
else:
from optparse import OptionParser, make_option
options = [
make_option('-i','--biom_fp',type="string",
help='the BIological Observation Matrix filepath'),
make_option('-a','--axis', type='string',
help="The axis to subset over, either 'samples' or 'observations'"),
make_option('-s','--ids_fp',type="string",
help="A file containing a single column of IDs to retain"),
make_option('-o','--output_fp',type="string",
help="A file to write the result to")
]
if __name__ == '__main__':
if cogent_cl_parsing:
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
else:
parser = OptionParser(option_list=options)
opts, args = parser.parse_args()
ids = [l.strip() for l in open(opts.ids_fp)]
biom_str = open(opts.biom_fp).read()
idxs, new_axis_md = get_axis_indices(biom_str, ids, opts.axis)
new_data = direct_slice_data(biom_str, idxs, opts.axis)
output = open(opts.output_fp,'w')
# multiple walks over the file. bad form, but easy right now
# ...should add a yield_and_ignore parser or something.
output.write('{')
output.write(direct_parse_key(biom_str, "id"))
output.write(",")
output.write(direct_parse_key(biom_str, "format"))
output.write(",")
output.write(direct_parse_key(biom_str, "format_url"))
output.write(",")
output.write(direct_parse_key(biom_str, "type"))
output.write(",")
output.write(direct_parse_key(biom_str, "generated_by"))
output.write(",")
output.write(direct_parse_key(biom_str, "date"))
output.write(",")
output.write(direct_parse_key(biom_str, "matrix_type"))
output.write(",")
output.write(direct_parse_key(biom_str, "matrix_element_type"))
output.write(",")
output.write(new_data)
output.write(",")
output.write(new_axis_md)
output.write(",")
if opts.axis == "observations":
output.write(direct_parse_key(biom_str, "columns"))
else:
output.write(direct_parse_key(biom_str, "rows"))
output.write("}")
output.close()
|