/usr/share/perl5/Genome/Model/Tools/Music/Bmr/CalcCovgHelper.pm is in libgenome-model-tools-music-perl 0.04-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | package Genome::Model::Tools::Music::Bmr::CalcCovgHelper;
use warnings;
use strict;
use IO::File;
our $VERSION = $Genome::Model::Tools::Music::VERSION;
class Genome::Model::Tools::Music::Bmr::CalcCovgHelper {
is => 'Genome::Model::Tools::Music::Bmr::Base',
has_input => [
roi_file => { is => 'Text', doc => "Tab delimited list of ROIs [chr start stop gene_name] (See Description)" },
reference_sequence => { is => 'Text', doc => "Path to reference sequence in FASTA format" },
normal_tumor_bam_pair => { is => 'Text', doc => "Tab delimited line with sample name, path to normal bam file, and path to tumor bam file (See Description)" },
output_file => { is => 'Text', doc => "Output file path. Specify either output-file or output-directory.", is_optional => 1},
output_dir => { is => 'Text', doc => "Output directory path. Specify either output-file or output-directory", is_optional => 1},
normal_min_depth => { is => 'Integer', doc => "The minimum read depth to consider a Normal BAM base as covered", is_optional => 1, default => 6},
tumor_min_depth => { is => 'Integer', doc => "The minimum read depth to consider a Tumor BAM base as covered", is_optional => 1, default => 8},
min_mapq => { is => 'Integer', doc => "The minimum mapping quality of reads to consider towards read depth counts", is_optional => 1, default => 20},
],
has_calculated_optional => [
sample_name => {
calculate_from => ['normal_tumor_bam_pair'],
calculate => q {my @bams = split /\t/, $normal_tumor_bam_pair; return $bams[0];},
},
final_output_file => {
is_output => 1,
calculate_from => ['output_file','output_dir','sample_name'],
calculate => q {if ($output_file) {return $output_file;} elsif ($output_dir){return $output_dir."/".$sample_name;} else {die "Either output-file or output-dir must be specified."}},
},
normal_bam => {
calculate_from => ['normal_tumor_bam_pair'],
calculate => q {my @bams = split /\t/, $normal_tumor_bam_pair; return $bams[1];},
},
tumor_bam => {
calculate_from => ['normal_tumor_bam_pair'],
calculate => q {my @bams = split /\t/, $normal_tumor_bam_pair; return $bams[2];},
},
],
doc => "Uses calcRoiCovg.c to count covered bases per-gene for a tumor-normal pair of BAMs."
};
sub help_synopsis {
return <<HELP
General usage:
... music bmr calc-covg-helper \\
--normal-tumor-bam-pair "sample-name path/to/normal_bam path/to/tumor_bam" \\
--reference-sequence input_dir/all_sequences.fa \\
--output-file output_file \\
--roi-file input_dir/all_coding_exons.tsv
HELP
}
sub help_detail {
return <<HELP;
This script counts bases with sufficient coverage in the ROIs of each gene in the given pair of
tumor-normal BAM files and categorizes them into - AT, CG (non-CpG), and CpG counts. It also adds
up these base-counts across all ROIs of each gene in the sample, but covered bases that lie
within overlapping ROIs are not counted more than once towards these total counts.
HELP
}
sub _additional_help_sections {
return (
"ARGUMENTS",
<<EOS
=over 4
=item --roi-file
=over 8
=item The regions of interest (ROIs) of each gene are typically regions targeted for sequencing or are
merged exon loci (from multiple transcripts) of genes with 2-bp flanks (splice junctions). ROIs
from the same chromosome must be listed adjacent to each other in this file. This allows the
underlying C-based code to run much more efficiently and avoid re-counting bases seen in
overlapping ROIs (for overall covered base counts). For per-gene base counts, an overlapping
base will be counted each time it appears in an ROI of the same gene. To avoid this, be sure to
merge together overlapping ROIs of the same gene. BEDtools' mergeBed can help if used per gene.
=back
=item --reference-sequence
=over 8
=item The reference sequence in FASTA format. If a reference sequence index is not found next to this
file (a .fai file), it will be created.
=back
=item --normal-tumor-bam-pair
=over 8
=item "sample-name path/to/normal_bam path/to/tumor_bam"
=back
=item --output-file
=over 8
=item Specify an output file where the per-ROI covered base counts will be written
=back
=back
EOS
);
}
sub _doc_authors {
return " Cyriac Kandoth, Ph.D.";
}
sub _doc_see_also {
return <<EOS
B<genome-music-bmr>(1),
B<genome-music>(1),
B<genome>(1)
EOS
}
sub execute {
my $self = shift;
my $roi_file = $self->roi_file;
my $ref_seq = $self->reference_sequence;
my $tumor_bam = $self->tumor_bam;
my $normal_bam = $self->normal_bam;
my $output_file = $self->final_output_file;
my $normal_min_depth = $self->normal_min_depth;
my $tumor_min_depth = $self->tumor_min_depth;
my $min_mapq = $self->min_mapq;
# Check on all the input data before starting work
print STDERR "ROI file not found or is empty: $roi_file\n" unless( -s $roi_file );
print STDERR "Reference sequence file not found: $ref_seq\n" unless( -e $ref_seq );
print STDERR "Normal BAM file not found or is empty: $normal_bam\n" unless( -s $normal_bam );
print STDERR "Tumor BAM file not found or is empty: $tumor_bam\n" unless( -s $tumor_bam );
return undef unless( -s $roi_file && -e $ref_seq && -s $normal_bam && -s $tumor_bam );
# Check whether the annotated regions of interest are clumped together by chromosome
my $roiFh = IO::File->new( $roi_file ) or die "ROI file could not be opened. $!\n";
my @chroms = ( "" );
while( my $line = $roiFh->getline ) # Emulate Unix's uniq command on the chromosome column
{
my ( $chrom ) = ( $line =~ m/^(\S+)/ );
push( @chroms, $chrom ) if( $chrom ne $chroms[-1] );
}
$roiFh->close;
my %chroms = map { $_ => 1 } @chroms; # Get the actual number of unique chromosomes
if( scalar( @chroms ) != scalar( keys %chroms ))
{
print STDERR "ROIs from the same chromosome must be listed adjacent to each other in file. ";
print STDERR "If in UNIX, try:\nsort -k 1,1 $roi_file\n";
return undef;
}
# If the reference sequence FASTA file hasn't been indexed, do it
my $ref_seq_idx = "$ref_seq.fai";
system( "samtools faidx $ref_seq" ) unless( -e $ref_seq_idx );
$normal_bam = '' unless( defined $normal_bam );
$tumor_bam = '' unless( defined $tumor_bam );
print STDERR "Normal BAM not found: \"$normal_bam\"\n" unless( -e $normal_bam );
print STDERR "Tumor BAM not found: \"$tumor_bam\"\n" unless( -e $tumor_bam );
next unless( -e $normal_bam && -e $tumor_bam );
# Construct the command that calculates coverage per ROI
my $calcRoiCovg_cmd = "calcRoiCovg $normal_bam $tumor_bam $roi_file $ref_seq $output_file $normal_min_depth $tumor_min_depth $min_mapq";
# If the calcRoiCovg output was already generated, then don't rerun it
if( -s $output_file )
{
print "Output file $output_file found. Skipping re-calculation.\n";
}
# Run the calcRoiCovg command on this tumor-normal pair. This could take a while
elsif( system( "$calcRoiCovg_cmd" ) != 0 )
{
print STDERR "Failed to execute: $calcRoiCovg_cmd\n";
return;
}
else
{
print "$output_file generated and stored.\n";
return 1;
}
}
1;
|