/usr/bin/groo-ga.pl is in gramadoir 0.7-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
groo-ga.pl - Check the grammar of Irish language OpenOffice.org documents
=head1 SYNOPSIS
B<groo-ga.pl> I<filetocheck[.sxw]> I<errorfile[.sxw]>
=head1 DESCRIPTION
This script checks the grammar of the Irish language OpenOffice.org
document given as the first argument, and creates a new document
with the errors highlighted and annotated with appropriate messages.
=head1 REQUIRES
Perl 5.8, Lingua::GA::Gramadoir
=head1 SEE ALSO
=over 4
=item *
L<http://borel.slu.edu/gramadoir/>
=item *
L<Lingua::GA::Gramadoir>
=item *
L<perl(1)>
=back
=head1 AUTHOR
Kevin P. Scannell, E<lt>kscanne@gmail.comE<gt>.
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2004 Kevin P. Scannell
This is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.
=cut
use strict;
use warnings;
use utf8;
use Archive::Zip qw( :ERROR_CODES);
use Lingua::GA::Gramadoir;
use Encode qw(decode encode);
my $debug = 0;
if (scalar @ARGV != 2) {
print "Usage: $0 filetocheck[.sxw] errorfile[.sxw]\n";
exit 1;
}
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime time;
my $datestr = sprintf("%04d-%02d-%02d", $year+1900, $mon+1, $mday);
# style definition for preamble
# double-wavy red underline
my $style = '<style:style style:name="gramadoir" style:family="text"><style:properties style:text-underline="double-wave" style:text-underline-color="#800000"/></style:style>';
# wavy green underline
# my $style = '<style:style style:name="gramadoir" style:family="text"><style:properties style:text-underline="wave" style:text-underline-color="#008000"/></style:style>';
# yellow highlighter
# my $style = '<style:style style:name="gramadoir" style:family="text"><style:properties style:text-background-color="#ffff00"/></style:style>';
# annotation markup
my $ann = "<office:annotation office:author=\"An Gramadóir\" office:create-date=\"$datestr\"><text:p>";
my $closeann = '</text:p></office:annotation>';
# markup to insert before each error
my $markup = '<text:span text:style-name="gramadoir">';
my $closemarkup = $markup;
$closemarkup =~ s/[ >].*/>/;
$closemarkup =~ s/</<\//;
my $filetocheck = $ARGV[0];
my $errorfile = $ARGV[1];
$filetocheck =~ s/$/.sxw/ unless ($filetocheck =~ m/\.sxw$/);
$errorfile =~ s/$/.sxw/ unless ($errorfile =~ m/\.sxw$/);
my $zip = Archive::Zip->new();
die "error reading $filetocheck" unless $zip->read($filetocheck) == AZ_OK;
my $xml = $zip->contents('content.xml');
$xml =~ s/"/\\"/g;
$xml =~ s/</\\</g;
$xml =~ s/>/\\>/g;
$xml =~ s/'/'/g;
$xml =~ s/&/\\&/g;
print STDERR "Unpacked contents.xml, converted char entities...\n" if ($debug);
my $gr = new Lingua::GA::Gramadoir(
# fix_spelling => 1,
interface_language => 'ga',
input_encoding => 'utf-8',
);
print STDERR "Gramadoir object created...\n" if ($debug);
my $errs = $gr->grammatical_errors($xml);
print STDERR "Grammatical errors found...\n" if ($debug);
$xml = decode("utf-8", $xml);
my @xmllines = split /\n/, $xml;
my $xmlans;
my $curr_y = 0;
my $curr_x = 0;
my ($f_y, $f_x, $t_y, $t_x, $errmsg);
foreach (@$errs) {
($f_y, $f_x, $t_y, $t_x, $errmsg) = m/^<error fromy="([0-9]+)" fromx="([0-9]+)" toy="([0-9]+)" tox="([0-9]+)" .+msg="([^"]+)"/;
while ($curr_y < $f_y) {
$xmlans .= substr($xmllines[$curr_y], $curr_x)."\n";
$curr_y++;
$curr_x = 0;
}
$xmlans .= substr($xmllines[$f_y], $curr_x, $f_x - $curr_x);
$curr_x = $f_x;
my $errorspan='';
while ($curr_y < $t_y) {
$errorspan .= substr($xmllines[$curr_y], $curr_x)."\n";
$curr_y++;
$curr_x = 0;
}
$t_x++; # first char after error
$errorspan .= substr($xmllines[$t_y], $curr_x, $t_x - $curr_x);
$errorspan =~ s/((\s*<[^>]+>\s*)+)/$closemarkup$1$markup/g;
$errorspan =~ s/^/$markup/;
$errorspan =~ s/$/$closemarkup/;
$curr_x = $t_x;
$xmlans .= $errorspan;
$xmlans .= $ann;
$errmsg =~ s/\//\\"/g;
$xmlans .= $errmsg;
$xmlans .= $closeann;
}
print STDERR "All error markup inserted...\n" if ($debug);
$xmlans .= substr($xmllines[$curr_y], $t_x);
$curr_y++;
while ($curr_y <= scalar @xmllines) {
$xmlans .= $xmllines[$curr_y]."\n";
$curr_y++;
}
print STDERR "New XML completed...\n" if ($debug);
$xmlans = encode("utf-8", $xmlans);
$xmlans =~ s/\\&/&/g;
$xmlans =~ s/\\"/"/g;
$xmlans =~ s/\\</</g;
$xmlans =~ s/\\>/>/g;
$xmlans =~ s/'/'/g;
print STDERR "New XML converted to utf-8 octets...\n" if ($debug);
# insert description of gramadoir style in preamble
$xmlans =~ s/(?<=<office:automatic-styles>)/$style/;
$xmlans =~ s/(?<=<office:automatic-styles)\/>/>$style<\/office:automatic-styles>/;
$zip->contents('content.xml', $xmlans);
print STDERR "New XML replaces old in contents.xml...\n" if ($debug);
die "could not write to $errorfile" unless ($zip->writeToFileNamed( $errorfile ) == AZ_OK);
print STDERR "Zip file written...\n" if ($debug);
exit 0;
|