/usr/share/art-nextgen-simulation-tools/ART_profiler_illumina/combinedAvg.pl is in art-nextgen-simulation-tools-profiles 20160605+dfsg-2build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | #!/usr/bin/perl
# A perl program to compute the average from the frequencies generated by summation.pl amongst
# multiple files for each position.
# @author Jason Myers
use strict;
# Print an error message unless the a frequency file is specified
my $numArgs = $#ARGV + 1;
unless( $numArgs == 1){
print "Usage: perl combinedAvg.pl FreqFile\n";
exit;
}
# Open the input file
my $infile = $ARGV[0];
open INFILE, "$infile", or die $!;
# deal with the header line
my $header = <INFILE>;
# An array to hold the average scores
my @AVG_SCORE = ();
# An array to temporarily hold the elements of a given line
my @TEMP = ();
# The variable that temporarily holds each line as a string
my $line = '';
# the number of all possible quality scores
my $numScores = 71;
# An array holding all of the possible quality scores
my @ALL_SCORES = ();
# Fill the possible quality scores array
for(my $i = 0; $i < $numScores; $i++){
$ALL_SCORES[$i] = $i;
}
# a count variable to keep track of the line number being deal with
my $count = 0;
# Loop over the files contents
while(<INFILE>){
# capture the line as a string
$line = $_;
# Store the elements of the line in the temp array
@TEMP = split('\t', $line);
# Loop over the elements of the line
foreach my $pos(0 .. $#TEMP){
# Aggregate the products of the frequency and quality score
$AVG_SCORE[0][$pos] += ($TEMP[$pos] * $ALL_SCORES[$count]);
# Sum all of the frequencies
$AVG_SCORE[1][$pos] += $TEMP[$pos];
}
# Increase the line number counter
$count++;
}
# close the infile
close(INFILE);
# open the output file
my $outfile = $infile;
# by appending
open OUTFILE, ">>$outfile", or die $!;
# Write out the numerators
foreach my $pos1(0 .. ($#TEMP - 1)){
print OUTFILE $AVG_SCORE[0][$pos1], "\t";
}
print OUTFILE "\n";
# Write out the denominators
foreach my $pos2(0 .. ($#TEMP - 1)){
print OUTFILE $AVG_SCORE[1][$pos2], "\t";
}
print OUTFILE "\n";
# Write out the average quality score for each position
foreach my $pos3(0 .. ($#TEMP - 1)){
# If the numerator or the denominator is 0, avg quality is 0
if(($AVG_SCORE[0][$pos3] == 0) || ($AVG_SCORE[1][$pos3] == 0)){
print OUTFILE 0, "\t";
} else {
# If not, calculate the average quality score and write it
print OUTFILE ($AVG_SCORE[0][$pos3] / $AVG_SCORE[1][$pos3]), "\t";
}
}
print OUTFILE "\n";
# close the output file
close(OUTFILE);
# end the program
exit;
|