This file is indexed.

/usr/bin/fastx_nucleotide_distribution_graph.sh is in fastx-toolkit 0.0.14-5.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env bash

#    FASTX-toolkit - FASTA/FASTQ preprocessing tools.
#    Copyright (C) 2009-2013  A. Gordon (assafgordon@gmail.com)
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as
#   published by the Free Software Foundation, either version 3 of the
#   License, or (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

usage()
{
	echo "FASTA/Q Nucleotide Distribution Plotter"
	echo
	echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
	echo
	echo "  [-p]           - Generate PostScript (.PS) file. Default is PNG image."
	echo "  [-i INPUT.TXT] - Input file. Should be the output of \"fastx_quality_statistics\" program."
	echo "  [-o OUTPUT]    - Output file name. default is STDOUT."
	echo "  [-t TITLE]     - Title - will be plotted on the graph."
	echo
	exit 
}

#
# Input Data columns: #pos	cnt	min	max	sum       	mean	Q1	med	Q3	IQR	lW	rW A_Count	C_Count	G_Count	T_Count	N_Count
#  As produced by "fastq_quality_statistics" program

TITLE=""					# default title is empty
FILENAME=""
OUTPUTTERM="set term png size 1048,768"		# default output terminal is "PNG"
OUTPUTFILE="/dev/stdout"   			# Default output file is simply "stdout"
while getopts ":t:i:o:ph" Option
	do
	case $Option in
		t ) TITLE="for $OPTARG" ;;
		i ) FILENAME=$OPTARG ;;
		o ) OUTPUTFILE="$OPTARG" ;;
		p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 8" ;;
		h ) usage ;;
		* ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
	esac
done
shift $(($OPTIND - 1)) 


if [ -z "$FILENAME" ]; then
	usage
fi

if [ ! -r "$FILENAME" ]; then
	echo "Error: can't open input file ($1)." >&2
	exit 1
fi

##
## Input validation
## Too many users (in galaxy) try to plot a FASTQ file
## (without using the 'fastq statistics' tool first).
##
## gnuplot's error in that case is crypt, and support emails are annoying.
##
## try to detect FASTA/FASTQ input, and give a detailed, easy-to-understand warning.
##
##
AWK_FASTX_DETECTION='
NR==1 && $0 ~ /^>/ { fasta_id = 1 }
NR==1 && $0 ~ /^@/ { fastq_id = 1 }
NR==2 && $0 ~ /^[ACGT][ACGT]*$/ { nucleotides = 1 }
NR>3 { exit }
END { if ( fasta_id && nucleotides ) { print "FASTA" }
      if ( fastq_id && nucleotides ) { print "FASTQ" }
}'

INPUT_TYPE=$(awk "$AWK_FASTX_DETECTION" "$FILENAME")

if [ "x$INPUT_TYPE" = "xFASTA" ] ; then
#this doesn't even make sense: FASTA files don't contain any quality scores
cat>&2<<EOF
Error: It looks like your input file is a FASTA file.

This tool (fastq-quality-plot) can't use FASTA files directly - it requires a tabular text file conaining summary statistic about your FASTA file.

In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.

On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi
if [ "x$INPUT_TYPE" = "xFASTQ" ] ; then
cat>&2<<EOF
Error: It looks like your input file is a FASTQ file.

This tool (fastq-quality-plot) can't use FASTQ files directly - it requires a tabular text file conaining summary statistic about your FASTQ file.

In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.

On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi

##
## Even if this is not a FASTA/FASTQ file,
## users can still use incompatible input files.
## Try to detect it and abort with a warning.
AWK_VALID_STAT='NR==1 && $1=="column" && $2=="count" && $3=="min" { exit 2 } NR>1 { exit }'

awk "$AWK_VALID_STAT" "$FILENAME"
if [ $? -ne 2 ] ; then
cat>&2<<EOF
Error: Input file is not a valid statistics report.

This tool (fastq-quality-plot) requires a tabular text file conaining summary statistic about your FASTQ file.

In Galaxy,
Please use the "Compute Quality Statistics" tool (in the "NGS: QC and Manipulation" category) to compute the quality statistics report, and then use this tool with the new statistics report.

On the command line,
Please use the "fastx_quality_stats" program to create the statistics report.
EOF
exit 1
fi

GNUPLOTCMD="
$OUTPUTTERM
set boxwidth 0.75 absolute
set size 1,1
set style fill solid 1.00 border -1
set xlabel \"read position\"
set title \"Nucleotides distribution $TITLE\" 
set ylabel \"% of total (per read position)\" 
#set grid noxtics nomxtics ytics nomytics noztics nomztics \
# nox2tics nomx2tics noy2tics nomy2tics nocbtics nomcbtics
#set grid layerdefault   linetype 0 linewidth 1.000,  linetype 0 linewidth 1.000
set key outside right top vertical Left reverse enhanced autotitles columnhead nobox
set key invert samplen 4 spacing 1 width 0 height 0 
set style histogram rowstacked 
set style data histograms 
set noytics
set xtics 1
set yrange [ 0.00000 : 100.000 ] noreverse nowriteback

plot '$FILENAME' using (100.*column(13)/column(18)):xtic(1) title \"A\" lt rgb \"#5050ff\", \
       '' using (100.*column(14)/column(18)) title \"C\" lt rgb \"#e00000\", \
       '' using (100.*column(15)/column(18)) title \"G\" lt rgb \"#00c000\", \
       '' using (100.*column(16)/column(18)) title \"T\" lt rgb \"#e6e600\", \
       '' using (100.*column(17)/column(18)) title \"N\" lt rgb \"pink\"
"

echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"