/usr/bin/ngrams is in libtext-ngrams-perl 2.005-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | #!/usr/bin/perl -w
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
if 0; # not running under some shell
use strict;
use vars qw($VERSION);
#<? read_starfish_conf(); echo "\$VERSION = $ModuleVersion;"; !>
#+
$VERSION = 2.005;
#-
# $Revision: 1.26 $
use Text::Ngrams;
use Getopt::Long;
my ($help, $version, $orderby, $onlyfirst, $limit, $spartan);
my $n = 3;
my $type = 'character';
sub help {
print <<EOF;
Usage: $0 [options] [files]
Compute the ngram frequencies and produce tables to the stdout.
Options:
--n=N The default is 3-grams.
--normalize Produce normalized frequencies (divided by the total
number of n-grams of the same size)
--type=T The default is character. Other types include: byte,
words, utf8, or there can be user-defined types.
--limit=N Limit the number of distinct n-grams.
BEWARE: Final tables may be inaccurate if limit is used.
--help Show this help.
--version Show version.
--orderby=ARG ARG can be: frequency or ngram.
--onlyfirst=N Only first N ngrams are printed for each n.
--spartan If specified, only the n-grams of maximal length are
printed.
The options can be shortened to their unique prefixes and
the two dashes to one dash. No files means using STDIN.
NOTE: The documentation of the module Text::Ngrams.pl provides more
information.
EOF
exit(1);
}
my ($opt_normalize);
help()
unless
GetOptions('n=i' => \$n,
'normalize' => \$opt_normalize,
'type=s' => \$type,
'limit=i' => \$limit,
'help' => \$help,
'version' => \$version,
'orderby=s' => \$orderby,
'onlyfirst=i' => \$onlyfirst,
'spartan' => \$spartan);
help() if $n < 1 || int($n) != $n;
sub version {
print $VERSION, "\n";
exit(1);
}
help() if $help;
version() if $version;
my %params = ( windowsize=>$n, type=>$type);
if (defined($limit) and ($limit > 0)) { $params{'limit'} = $limit }
my $ng = Text::Ngrams->new( %params );
if ($#ARGV > -1) { $ng->process_files(@ARGV) }
else { $ng->process_files(\*STDIN) }
%params = ( 'out' => \*STDOUT );
if (defined($orderby) and $orderby) { $params{'orderby'} = $orderby }
if (defined($onlyfirst) and $onlyfirst>0) { $params{'onlyfirst'} = $onlyfirst }
if ($opt_normalize) { $params{'normalize'} = $opt_normalize }
if ($spartan) { $params{'spartan'} = $spartan }
print $ng->to_string( %params );
exit(0);
__END__
=head1 NAME
ngrams - Compute the ngram frequencies and produce tables to the stdout.
=head1 SYNOPIS
ngram [--version] [--help] [--n=3] [--normalize] [--type=TYPE]
[--orderby=ORD] [--onlyfirst=N] [input files]
=head1 DESCRIPTION
This script produces n-grams tables of the input files to the standard
output.
Options:
=over 4
=item --normalize
Prints normalized n-gram frequencies; i.e., the n-gram counts divided
by the total number of n-grams of the same size.
=item --onlyfirst=NUMBER
Prints only the first NUMBER n-grams for each n. See Text::Ngrams module.
=item --limit=NUMBER
Limit the total number of distinct n-grams (for efficiency reasons,
the counts may not be correct at the end).
=item --version
Prints version.
=item --help
Prints help.
=item --n=NUMBER
N-gram size, produces 3-grams by default.
=item --orderby=frequency|ngram
The n-gram order. See Text::Ngrams module.
=item --type=character|byte|word|utf8
Type of n-grams produces. See Text::Ngrams module.
=back
=head1 PREREQUISITES
Text::Ngrams,
Getopt::Long
=head1 SCRIPT CATEGORIES
Text::Statistics
=head1 README
N-gram analysis for various kinds of n-grams (character, words, bytes,
utf8, and user-defined). Based on Text::Ngrams module.
=head1 SEE ALSO
Text::Ngrams module.
=head1 COPYRIGHT
Copyright 2003-2013 Vlado Keselj F<http://web.cs.dal.ca/~vlado>
This module is provided "as is" without expressed or implied warranty.
This is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
The latest version can be found at F<http://web.cs.dal.ca/~vlado/srcperl/>.
=cut
|