/usr/bin/vcf-shuffle-cols is in vcftools 0.1.15-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #!/usr/bin/env perl
#
# Author: petr.danecek@sanger
#
use strict;
use warnings;
use Carp;
use Vcf;
my $opts = parse_params();
concat($opts);
exit;
#--------------------------------
sub error
{
my (@msg) = @_;
if ( scalar @msg )
{
croak @msg;
}
die
"About: Reorder columns to match the order in the template VCF.\n",
"Usage: vcf-shuffle-cols [OPTIONS] -t template.vcf.gz file.vcf.gz > out.vcf\n",
"Options:\n",
" -t, --template <file> The file with the correct order of the columns.\n",
" -h, -?, --help This help message.\n",
"\n";
}
sub parse_params
{
my $opts = {};
while (my $arg=shift(@ARGV))
{
if ( $arg eq '-t' || $arg eq '--template' ) { $$opts{template}=shift(@ARGV); next; }
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
if ( -e $arg ) { $$opts{file}=$arg; next }
error("Unknown parameter \"$arg\". Run -h for help.\n");
}
if ( !exists($$opts{template}) ) { error("Missing the -t option.\n"); }
return $opts;
}
sub concat
{
my ($opts) = @_;
my $tmpl = Vcf->new(file=>$$opts{template});
$tmpl->parse_header();
$tmpl->close();
my $vcf = $$opts{file} ? Vcf->new(file=>$$opts{file}) : Vcf->new(fh=>\*STDIN);
$vcf->parse_header();
# Check if one-to-one correspondence can be found and create a mapping
my @new_to_old = ();
for my $tcol (@{$$tmpl{columns}})
{
if ( !exists($$vcf{has_column}{$tcol}) ) { error("TODO: the column names do not match\n"); }
}
for my $vcol (@{$$vcf{columns}})
{
if ( !exists($$tmpl{has_column}{$vcol}) ) { error("TODO: the column names do not match\n"); }
my $new = $$tmpl{has_column}{$vcol} - 1;
my $old = $$vcf{has_column}{$vcol} - 1;
$new_to_old[$new] = $old;
}
# Output the header with modified column order
my $ncols = @{$$tmpl{columns}} - 1;
my @cols = @{$$tmpl{columns}}[9..$ncols];
print $vcf->format_header(\@cols);
while (my $x=$vcf->next_data_array())
{
print $$x[0];
for (my $i=1; $i<=$ncols; $i++)
{
my $idx = $new_to_old[$i];
print "\t".$$x[$idx];
}
print "\n";
}
}
|