/usr/bin/fastx_nucleotide_distribution_graph.sh is in fastx-toolkit 0.0.14-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | #!/usr/bin/env bash
# FASTX-toolkit - FASTA/FASTQ preprocessing tools.
# Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
usage()
{
echo "FASTA/Q Nucleotide Distribution Plotter"
echo
echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
echo
echo " [-p] - Generate PostScript (.PS) file. Default is PNG image."
echo " [-i INPUT.TXT] - Input file. Should be the output of \"fastx_quality_statistics\" program."
echo " [-o OUTPUT] - Output file name. default is STDOUT."
echo " [-t TITLE] - Title - will be plotted on the graph."
echo
exit
}
#
# Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
# As produced by "fastq_quality_statistics" program
TITLE="" # default title is empty
FILENAME=""
OUTPUTTERM="set term png size 1048,768" # default output terminal is "PNG"
OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout"
while getopts ":t:i:o:ph" Option
do
case $Option in
t ) TITLE="for $OPTARG" ;;
i ) FILENAME=$OPTARG ;;
o ) OUTPUTFILE="$OPTARG" ;;
p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
h ) usage ;;
* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
esac
done
shift $(($OPTIND - 1))
if [ -z "$FILENAME" ]; then
usage
fi
if [ ! -r "$FILENAME" ]; then
echo "Error: can't open input file ($1)." >&2
exit 1
fi
##
## Input validation
## Too many users (in galaxy) try to plot a FASTQ file
## (without using the 'fastq statistics' tool first).
##
## gnuplot's error in that case is crypt, and support emails are annoying.
##
## try to detect FASTA/FASTQ input, and give a detailed, easy-to-understand warning.
##
##
AWK_FASTX_DETECTION='
NR==1 && $0 ~ /^>/ { fasta_id = 1 }
NR==1 && $0 ~ /^@/ { fastq_id = 1 }
NR==2 && $0 ~ /^[ACGT][ACGT]*$/ { nucleotides = 1 }
NR>3 { exit }
END { if ( fasta_id && nucleotides ) { print "FASTA" }
if ( fastq_id && nucleotides ) { print "FASTQ" }
}'
INPUT_TYPE=$(awk "$AWK_FASTX_DETECTION" "$FILENAME")
if [ "x$INPUT_TYPE" = "xFASTA" ] ; then
#this doesn't even make sense: FASTA files don't contain any quality scores
cat>&2<<EOF
Error: It looks like your input file is a FASTA file.
This tool (fastq-quality-plot) can't use FASTA files directly - it requires a tabular text file conaining summary statistic about your FASTA file.
In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
if [ "x$INPUT_TYPE" = "xFASTQ" ] ; then
cat>&2<<EOF
Error: It looks like your input file is a FASTQ file.
This tool (fastq-quality-plot) can't use FASTQ files directly - it requires a tabular text file conaining summary statistic about your FASTQ file.
In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
##
## Even if this is not a FASTA/FASTQ file,
## users can still use incompatible input files.
## Try to detect it and abort with a warning.
AWK_VALID_STAT='NR==1 && $1=="column" && $2=="count" && $3=="min" { exit 2 } NR>1 { exit }'
awk "$AWK_VALID_STAT" "$FILENAME"
if [ $? -ne 2 ] ; then
cat>&2<<EOF
Error: Input file is not a valid statistics report.
This tool (fastq-quality-plot) requires a tabular text file conaining summary statistic about your FASTQ file.
In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.
On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
GNUPLOTCMD="
$OUTPUTTERM
set boxwidth 0.75 absolute
set size 1,1
set style fill solid 1.00 border -1
set xlabel \"read position\"
set title \"Nucleotides distribution $TITLE\"
set ylabel \"% of total (per read position)\"
#set grid noxtics nomxtics ytics nomytics noztics nomztics \
# nox2tics nomx2tics noy2tics nomy2tics nocbtics nomcbtics
#set grid layerdefault linetype 0 linewidth 1.000, linetype 0 linewidth 1.000
set key outside right top vertical Left reverse enhanced autotitles columnhead nobox
set key invert samplen 4 spacing 1 width 0 height 0
set style histogram rowstacked
set style data histograms
set noytics
set xtics 1
set yrange [ 0.00000 : 100.000 ] noreverse nowriteback
plot '$FILENAME' using (100.*column(13)/column(18)):xtic(1) title \"A\" lt rgb \"#5050ff\", \
'' using (100.*column(14)/column(18)) title \"C\" lt rgb \"#e00000\", \
'' using (100.*column(15)/column(18)) title \"G\" lt rgb \"#00c000\", \
'' using (100.*column(16)/column(18)) title \"T\" lt rgb \"#e6e600\", \
'' using (100.*column(17)/column(18)) title \"N\" lt rgb \"pink\"
"
echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"
|