/usr/lib/python3/dist-packages/biom/cli/table_summarizer.py is in python3-biom-format 2.1.5+dfsg-7.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | # -----------------------------------------------------------------------------
# Copyright (c) 2011-2015, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------
from __future__ import division
from operator import itemgetter
import click
from numpy import std
from biom import load_table
from biom.cli import cli
from biom.util import compute_counts_per_sample_stats
@cli.command(name='summarize-table')
@click.option('-i', '--input-fp', required=True,
type=click.Path(exists=True, dir_okay=False),
help='The input BIOM table')
@click.option('-o', '--output-fp', default=None,
type=click.Path(writable=True, dir_okay=False),
help='An output file-path')
@click.option('--qualitative', default=False, is_flag=True,
help="Present counts as number of unique observation ids per"
" sample, rather than counts of observations per sample.")
@click.option('--observations', default=False, is_flag=True,
help="Summarize over observations")
def summarize_table(input_fp, output_fp, qualitative, observations):
"""Summarize sample or observation data in a BIOM table.
Provides details on the observation counts per sample, including summary
statistics, as well as metadata categories associated with samples and
observations.
Example usage:
Write a summary of table.biom to table_summary.txt:
$ biom summarize-table -i table.biom -o table_summary.txt
"""
table = load_table(input_fp)
result = _summarize_table(table, qualitative, observations)
if output_fp:
with open(output_fp, 'w') as fh:
fh.write(result)
else:
click.echo(result)
def _summarize_table(table, qualitative=False, observations=False):
lines = []
if observations:
table = table.transpose()
min_counts, max_counts, median_counts, mean_counts, counts_per_samp =\
compute_counts_per_sample_stats(table, qualitative)
num_observations = len(table.ids(axis='observation'))
counts_per_sample_values = list(counts_per_samp.values())
if table.metadata() is None:
sample_md_keys = ["None provided"]
else:
sample_md_keys = table.metadata()[0].keys()
if table.metadata(axis='observation') is None:
observation_md_keys = ["None provided"]
else:
observation_md_keys = table.metadata(axis='observation')[0].keys()
num_samples = len(table.ids())
if observations:
# as this is a transpose of the original table...
lines.append('Num samples: %d' % num_observations)
lines.append('Num observations: %d' % num_samples)
else:
lines.append('Num samples: %d' % num_samples)
lines.append('Num observations: %d' % num_observations)
if not qualitative:
total_count = sum(counts_per_sample_values)
lines.append('Total count: %d' % total_count)
lines.append('Table density (fraction of non-zero values): %1.3f' %
table.get_table_density())
lines.append('')
if qualitative:
if observations:
lines.append('Sample/observations summary:')
else:
lines.append('Observations/sample summary:')
else:
lines.append('Counts/sample summary:')
lines.append(' Min: %r' % min_counts)
lines.append(' Max: %r' % max_counts)
lines.append(' Median: %1.3f' % median_counts)
lines.append(' Mean: %1.3f' % mean_counts)
lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
if observations:
# since this is a transpose...
lines.append(
' Sample Metadata Categories: %s' %
'; '.join(observation_md_keys))
lines.append(
' Observation Metadata Categories: %s' %
'; '.join(sample_md_keys))
lines.append('')
else:
lines.append(
' Sample Metadata Categories: %s' %
'; '.join(sample_md_keys))
lines.append(
' Observation Metadata Categories: %s' %
'; '.join(observation_md_keys))
lines.append('')
if qualitative:
lines.append('Observations/sample detail:')
else:
lines.append('Counts/sample detail:')
for k, v in sorted(counts_per_samp.items(), key=itemgetter(1)):
lines.append('%s: %r' % (k, v))
return "\n".join(lines)
|