/usr/bin/run_mapsembler2_pipeline is in mapsembler2 2.2.4+dfsg-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | #!/bin/bash
#######################################################################
#################### HEADER, CHANGE VALUES HERE #######################
#######################################################################
starters_file=""
read_sets="" # FOR instance: "read_set1.fa.gz read_set2.fq.gz"
prefix_index="index" # all files used for indexing will be written will start with this prefix
prefix_results="res" # all final files will be written will start with this prefix
extremities_filename="starter_extremities.fa"
k=31 # size of kmers
c=5 # minimal coverage
d=1 # estimated number of error per read
g=3000000000 # estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
t=0
f=1 #kind of process (search mode 1=Breadth and 2=Depth)
x=40 #node length limit
y=10000 #graph depth limit
PATH_TOOLS="" # path were executables mapsembler and phaser are. Leave blank if they are located in a directory located in the PATH environnement variable
#######################################################################
#################### END HEADER #######################
#######################################################################
function help {
echo "run_mapsembler_pipeline.sh, a pipelining mapsembler2_extremities, mapsembler2_extend and kissread_g"
echo "Usage: ./run_mapsembler_and_phaser.sh -s <starter.fasta> -r <reads.faste> -t [1/2/3/4]<options>"
echo -e "\t \t -s: file containing starters (fasta)"
echo -e "\t \t -r list of reads separated by space, surrounded by the '\"' character. Note that reads may be in fasta or fastq format, gzipped or not. Example: -r \"data_sample/reads_sequence1.fasta data_sample/reads_sequence2.fasta.gz\"."
echo -e "\t \t -t: kind of assembly: 1=unitig (fasta), 2=contig (fasta), 3=unitig (graph), 4=contig(graph)"
echo -e "<options>:"
echo -e "\t\t -p prefix. All out files will start with this prefix. Example: -p my_prefix"
echo -e "\t\t -k value. Set the length of used kmers. Must fit the compiled value. Default=31. Example -k 31"
echo -e "\t\t -c value. Set the minimal coverage. Default=5. Example -c 5"
echo -e "\t\t -d value. Set the number of authorized substitutions used while mapping reads on found SNPs. Default=1. Example: -d 1"
echo -e "\t\t -g value. Estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM. Default=10 million. Example: -d 10000000"
echo -e "\t\t -f value. Set the process of search in the graph (1=Breadth and 2=Depth). Default=1. Example: -f 1"
echo -e "\t\t -x value. Set the maximal nodes length . Default=40. Example: -x 40"
echo -e "\t\t -y value. Set the maximal graph depth . Default=10000. Example: -y 10000"
echo -e "\t\t -h Prints this message and exist"
echo "Any further question: read the readme file or contact us: pierre.peterlongo@inria.fr"
}
#######################################################################
#################### GET OPTIONS #######################
#######################################################################
while getopts "s:r:t:p:k:c:d:g:f:x:y:h" opt; do
case $opt in
h)
help
exit
;;
s)
echo "use starter file: $OPTARG" >&2
starters_file=$OPTARG
;;
r)
echo "use read set: $OPTARG" >&2
read_sets=$OPTARG
;;
p)
echo "use prefix=$OPTARG" >&2
prefix_index=$OPTARG
prefix_results=$OPTARG
;;
k)
echo "use k=$OPTARG" >&2
k=$OPTARG
;;
t)
echo "use t=$OPTARG" >&2
t=$OPTARG
;;
c)
echo "use c=$OPTARG" >&2
c=$OPTARG
;;
d)
echo "use d=$OPTARG" >&2
d=$OPTARG
;;
g)
echo "use g=$OPTARG" >&2
g=$OPTARG
;;
f)
echo "use f=$OPTARG" >&2
f=$OPTARG
;;
x)
echo "use x=$OPTARG" >&2
x=$OPTARG
;;
y)
echo "use y=$OPTARG" >&2
y=$OPTARG
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
#######################################################################
#################### END GET OPTIONS #######################
#######################################################################
if [ -z "$starters_file" ] && [ -z "$read_sets" ];
then
help
exit
fi
if [ $t -eq 0 ]
then
echo "-t is mandatory, please fix t in value 1, 2, 3, or 4"
exit
fi
######### CHECK THE k PARITY ##########
rest=$(( $k % 2 ))
if [ $rest -eq 0 ]
then
echo "k=$k is even number, to avoid palindromes we set it to $(($k-1))"
k=$(($k-1))
fi
#######################################
#######################################################################
#################### OPTIONS SUMMARY #######################
#######################################################################
MY_PATH="`( cd \"$MY_PATH\" && pwd )`" # absolutized and normalized
if [ -z "$MY_PATH" ] ; then
# error; for some reason, the path is not accessible
# to the script (e.g. permissions re-evaled after suid)
exit 1 # fail
fi
echo -e "\tRunning pipeline Mapsembler2_extremities, Mapsembler2_extend and Kissread_g, in directory "$MY_PATH" with following parameters:"
echo -e "\t\t fasta file containing starters="$starters_file
echo -e "\t\t fasta files containing reads="$read_sets
echo -e "\t\t fasta files generating and containing starter extremities ="$extremities_filename
echo -e "\t\t prefix for index files="$prefix_index
echo -e "\t\t prefix for results files="$prefix_results
echo -e "\t\t k="$k
echo -e "\t\t t="$t
echo -e "\t\t c="$c
echo -e "\t\t d="$d
echo -e "\t\t g="$g
echo -e "\t\t f="$f
echo -e "\t\t x="$x
echo -e "\t\t y="$y
echo -e -n "\t\t starting date="
date
echo
#######################################################################
#################### END OPTIONS SUMMARY #######################
#######################################################################
#######################################################################
#################### mapsembler_extremeities #######################
#######################################################################
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads \"$read_sets\" --output $extremities_filename \033[0m"
$PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads "$read_sets" --output $extremities_filename
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler2_extremities --k $k --starters $starters_file --reads $read_sets --output $extremities_filename"
exit
fi
#######################################################################
#################### mapsembler #######################
#######################################################################
if [ -f "$extremities_filename" ]
then
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS""mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results\033[0m"
$PATH_TOOLS\mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results"
exit
fi
fi
#######################################################################
#################### kissreads_g and kissreads #######################
#######################################################################
if [ $t -gt 2 ]
then
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler2_kissreads_graph"
exit
fi
echo -e "\n"
echo -e "\033[31m $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler2_kissreads_graph"
exit
fi
res_file=$prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json
else
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta\033[0m"
$PATH_TOOLS\mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -k $k -c $c -d $d -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler2_kissreads"
exit
fi
res_file=$prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta
fi
#/Users/grizk/gassst -d test_k_31_c_4_coherent.fa -i /Users/grizk/kissnp2/kissnp2_tool/snp_list_readsnp -p 100 -w 15 -m 8 -l 0 -r 1 -o temp;
echo "mapsembler done, results are in $res_file"
echo -e -n "\t ending date="
date
echo -e "\t Thanks for using mapsembler - http://http://colibread.inria.fr/mapsembler/"
|