This file is indexed.

/usr/bin/bp_gccalc is in bioperl 1.7.2-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/perl 

use strict;
use warnings;

use Bio::SeqIO;
use Bio::Tools::SeqStats;
use Getopt::Long;
my $format = 'fasta';
my $file;
my $help =0;
my $aggregate;
GetOptions(
	    'f|format:s' => \$format,
	    'i|in:s'     => \$file,
	    'h|help|?'    => \$help,
	    'a|aggregate!'=> \$aggregate,
	    );


my $USAGE = "usage: gccalc.pl -f format -i filename\n";
if( $help ) {
    die $USAGE;
}

$file = shift unless $file;
my $seqin;
if( defined $file ) {
    print "Could not open file [$file]\n$USAGE" and exit unless -e $file;
    $seqin = new Bio::SeqIO(-format => $format,
			    -file   => $file);
} else {
    $seqin = new Bio::SeqIO(-format => $format,
			    -fh     => \*STDIN);
}
my ($total_base, $total_gc);
while( my $seq = $seqin->next_seq ) {
    next if( $seq->length == 0 );
    if( $seq->alphabet eq 'protein' ) {
	warn("gccalc does not work on amino acid sequences ...skipping this seq");
	next;
    }

    my $seq_stats  =  Bio::Tools::SeqStats->new('-seq'=>$seq);
    my $hash_ref = $seq_stats->count_monomers();  # for DNA sequence
    print "Seq: ", $seq->display_id, " ";
    print $seq->desc if $seq->desc;
    print " Len:", $seq->length, "\n";
    $total_base += $seq->length;
    $total_gc   += $hash_ref->{'G'} + $hash_ref->{'C'};
    printf "GC content is %.4f\n", ($hash_ref->{'G'} + $hash_ref->{'C'}) /
	$seq->length();

    foreach my $base (sort keys %{$hash_ref}) {
	print "Number of bases of type ", $base, "= ", $hash_ref->{$base},"\n";
    }
    print "--\n";
}
if( $aggregate ) {
  printf "Total GC content is %.4f out of %d bases\n", $total_gc / $total_base, $total_base;
}
# alternatively one could use code submitted by
# cckim@stanford.edu

sub calcgc {
    my $seq = $_[0];
    my @seqarray = split('',$seq);
    my $count = 0;
    foreach my $base (@seqarray) {
	$count++ if $base =~ /[G|C]/i;
    }

    my $len = $#seqarray+1;
    return $count / $len;
}


__END__

=head1 NAME

bp_gccalc - GC content of nucleotide sequences

=head1 SYNOPSIS

  bp_gccalc [-f/--format FORMAT] [-h/--help] filename
  or
  bp_gccalc [-f/--format FORMAT] < filename
  or
  bp_gccalc [-f/--format FORMAT] -i filename

=head1 DESCRIPTION

This scripts prints out the GC content for every nucleotide sequence
from the input file.

=head1 OPTIONS

The default sequence format is fasta.

The sequence input can be provided using any of the three methods:

=over 3

=item unnamed argument

  bp_gccalc filename

=item named argument

  bp_gccalc -i filename

=item standard input

  bp_gccalc < filename

=back

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via the
web:

  https://github.com/bioperl/bioperl-live/issues

=head1 AUTHOR - Jason Stajich

Email jason@bioperl.org

=head1 HISTORY

Based on script code (see bottom) submitted by cckim@stanford.edu

Submitted as part of bioperl script project 2001/08/06

=cut