/usr/bin/fastq_quality_boxplot_graph.sh is in fastx-toolkit 0.0.14-5.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | #!/usr/bin/env bash
# FASTX-toolkit - FASTA/FASTQ preprocessing tools.
# Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
function usage()
{
echo "Solexa-Quality BoxPlot plotter"
echo "Generates a solexa quality score box-plot graph "
echo
echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
echo
echo " [-p] - Generate PostScript (.PS) file. Default is PNG image."
echo " [-i INPUT.TXT] - Input file. Should be the output of \"solexa_quality_statistics\" program."
echo " [-o OUTPUT] - Output file name. default is STDOUT."
echo " [-t TITLE] - Title (usually the solexa file name) - will be plotted on the graph."
echo
exit
}
#
# Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
# As produced by "solexa_quality_statistics" program
TITLE="" # default title is empty
FILENAME=""
OUTPUTTERM="set term png size 2048,768" # default output terminal is "PNG"
OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout"
while getopts ":t:i:o:ph" Option
do
case $Option in
# w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;;
t ) TITLE="for $OPTARG" ;;
i ) FILENAME=$OPTARG ;;
o ) OUTPUTFILE="$OPTARG" ;;
p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
h ) usage ;;
* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
esac
done
shift $(($OPTIND - 1))
if [ "$FILENAME" == "" ]; then
usage
fi
if [ ! -r "$FILENAME" ]; then
echo "Error: can't open input file ($1)." >&2
exit 1
fi
##
## Input validation
## Too many users (in galaxy) try to plot a FASTQ file
## (without using the 'fastq statistics' tool first).
##
## gnuplot's error in that case is crypt, and support emails are annoying.
##
## try to detect FASTA/FASTQ input, and give a detailed, easy-to-understand warning.
##
##
AWK_FASTX_DETECTION='
NR==1 && $0 ~ /^>/ { fasta_id = 1 }
NR==1 && $0 ~ /^@/ { fastq_id = 1 }
NR==2 && $0 ~ /^[ACGT][ACGT]*$/ { nucleotides = 1 }
NR>3 { exit }
END { if ( fasta_id && nucleotides ) { print "FASTA" }
if ( fastq_id && nucleotides ) { print "FASTQ" }
}'
INPUT_TYPE=$(awk "$AWK_FASTX_DETECTION" "$FILENAME")
if [ "x$INPUT_TYPE" = "xFASTA" ] ; then
#this doesn't even make sense: FASTA files don't contain any quality scores
cat>&2<<EOF
Error: It looks like your input file is a FASTA file.
FASTA files do not contain quality scores, and can not be used with this tool.
EOF
exit 1
fi
if [ "x$INPUT_TYPE" = "xFASTQ" ] ; then
cat>&2<<EOF
Error: It looks like your input file is a FASTQ file.
This tool (fastq-quality-plot) can't use FASTQ files directly - it requires a tabular text file conaining summary statistic about your FASTQ file.
In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
##
## Even if this is not a FASTA/FASTQ file,
## users can still use incompatible input files.
## Try to detect it and abort with a warning.
AWK_VALID_STAT='NR==1 && $1=="column" && $2=="count" && $3=="min" { exit 2 } NR>1 { exit }'
awk "$AWK_VALID_STAT" "$FILENAME"
if [ $? -ne 2 ] ; then
cat>&2<<EOF
Error: Input file is not a valid statistics report.
This tool (fastq-quality-plot) requires a tabular text file conaining summary statistic about your FASTQ file.
In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
#Read number of cycles from the stats file (each line is a cycle, minus the header line)
#But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now.
NUM_CYCLES=$(cat "$FILENAME" | wc -l)
GNUPLOTCMD="
$OUTPUTTERM
set boxwidth 0.8
set size 1,1
set key Left inside
set xlabel \"read position\"
set ylabel \"Quality Score (Solexa Scale: 40=Highest, -15=Lowest)\"
set title \"Quality Scores $TITLE\"
#set auto x
set bars 4.0
set xrange [ 0: $NUM_CYCLES ]
set yrange [-15:45]
set y2range [-15:45]
set xtics 1
set x2tics 1
set ytics 2
set y2tics 2
set tics out
set grid ytics
set style fill empty
plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \
'' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians'
"
echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"
|