/usr/bin/latexmlfind is in latexml 0.7.0-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | #!/usr/bin/perl -w
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
if 0; # not running under some shell
use strict;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use LaTeXML::Common::XML;
use Text::Wrap;
#**********************************************************************
# Parse command line
my $VERSION = '0.0.1';
#my $identity = "latexmlfind (LaTeXML version $LaTeXML::VERSION)";
my $identity = "latexmlfind ($VERSION)";
my($verbosity)=(0);
my ($help,$showversion,$SKELETON)=(0,0,0);
my (@symbols,@unknowns,@posfuncs,@labels,@refnums);
GetOptions("symbol=s" => \@symbols,,
"unknown=s" => \@unknowns,
"possiblefunction=s" => \@posfuncs,
"label=s" => \@labels,
"refnum=s" => \@refnums,
"skeleton" => \$SKELETON,
"quiet" => sub { $verbosity--; },
"verbose" => sub { $verbosity++; },
"VERSION" => \$showversion,
"help" => \$help,
) or pod2usage(-message => $identity, -exitval=>1, -verbose=>0, -output=>\*STDERR);
pod2usage(-message=>$identity, -exitval=>1, -verbose=>2, -output=>\*STDOUT) if $help;
if($showversion){ print STDERR "$identity\n"; exit(1); }
pod2usage(-message=>"$identity\nMissing input TeX file",
-exitval=>1, -verbose=>0,-output=>\*STDERR) unless @ARGV;
my $source = $ARGV[0];
#**********************************************************************
# Do the processing.
print STDERR "$identity\n" unless $verbosity < 0;
binmode(STDOUT,":utf8"); # Make sure output can handle UTF8
my $DOC = LaTeXML::Common::XML::Parser->new()->parseFile($source);
my $XPATH = LaTeXML::Common::XML::XPath->new(ltx=>"http://dlmf.nist.gov/LaTeXML");
$XPATH->registerFunction('match-font',\&LaTeXML::Font::match_font);
# Objects being labelled sections, equations, etc.
our %OBJECTS=();
our $ROOT_OBJECT = {type=>'Document',
subobjects=>[], items=>[]};
foreach my $symbol (@symbols){
collect_matches("Symbols \"$symbol\"",
"//ltx:Math[descendant::ltx:XMTok[\@name='$symbol' or text()='$symbol']"
# BUT which isn't in presentation branch of an XMDual!!
# ie. no ancestor w/preceding sibling has parent = XMDual
. "[not(ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual])]"
."]"); }
foreach my $spec (@unknowns){
my $symbol = $spec;
my $font;
$font = $1 if $symbol=~ s/{([\w\s]*)}$//;
collect_matches("Unknown \"$spec\"",
# Find Math containing an XMTok, with role=UNKNOWN
"//ltx:Math[descendant::ltx:XMTok[\@role='UNKNOWN']"
# whose name or content is the requested symbol
. "[\@name='$symbol' or text()='$symbol']"
. ($font ? "[\@font='$font']" : '')
# BUT which isn't in presentation branch of an XMDual!!
# ie. no ancestor w/preceding sibling has parent = XMDual
. "[not(ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual])]"
. "]"); }
foreach my $symbol (@posfuncs){
collect_matches("Possible function \"$symbol\"",
"//ltx:Math[descendant::ltx:XMTok[\@possibleFunction='yes']"
. "[\@name='$symbol' or text()='$symbol']"
. "]"); }
show_matches($ROOT_OBJECT);
#**********************************************************************
# This matches fonts when both are converted to strings (toString),
# such as when they are set as attributes.
sub match_font {
my($font1,$font2)=@_;
#print STDERR "Match font \"".($font1 || 'none')."\" to \"".($font2||'none')."\"\n";
return 0 unless $font1 && $font2;
$font1 =~ /^Font\[(.*)\]$/;
my @comp1 = split(',',$1);
$font2 =~ /^Font\[(.*)\]$/;
my @comp2 = split(',',$1);
while(@comp1){
my $c1 = shift @comp1;
my $c2 = shift @comp2;
return 0 if ($c1 ne '*') && ($c2 ne '*') && ($c1 ne $c2); }
return 1; }
#**********************************************************************
sub collect_matches {
my($description,$xpath)=@_;
my @nodes = $XPATH->findnodes($xpath,$DOC);
print "Query $description appears in ".scalar(@nodes)." places\n";
print " [XPath = \"$xpath\"]\n" if $verbosity > 0;
foreach my $node (@nodes){
my $object = id_object($node);
push( @{ $$object{items} }, $node); }}
sub id_object {
my($node)=@_;
my $id;
while(1){
$node = $node->parentNode;
return $ROOT_OBJECT if $node->nodeType != XML_ELEMENT_NODE;
last if $id = $node->getAttribute('xml:id'); }
if(my $object = $OBJECTS{$id}){
return $object; }
else {
my $parent_object = id_object($node);
my $type = $node->localname;
my $labels = $node->getAttribute('labels');
my $refnum = $node->getAttribute('refnum');
my($title) = $XPATH->findnodes("child::ltx:toctitle | child::ltx:title",$node);
my $desc = ($refnum ? ($title ? "$refnum. ".$title->textContent : $refnum)
: ($title ? $title->textContent : ''));
$desc =~ s/\s+/ /g;
$OBJECTS{$id}=$object
= {id=>$id, labels=>$labels, type=>$type, description=>$desc, children=>[], items=>[]};
push(@{$$parent_object{children}}, $object);
$object; }}
sub show_matches {
my($object,$indent)=@_;
$indent = '' unless defined $indent;
print $indent."$$object{type}:"
.($$object{id} ? " ID=$$object{id}":'')
.($$object{labels} ? " Labels=$$object{labels}":'')
.($$object{description}? " \"$$object{description}\"":"")."\n";
if(!$SKELETON){
foreach my $item (@{ $$object{items} }){
show_node($item,$indent.' | '); }}
foreach my $child (@{ $$object{children}}){
show_matches($child, $indent.' | '); }
}
sub show_node {
my($node,$indent)=@_;
if($node->localname eq 'Math'){
my $ptex = $node->getAttribute('tex');
my $ctex = $node->getAttribute('content-tex');
if($verbosity > 1){
print $indent.$node->toString."\n"; }
else {
print wrap($indent,$indent,$ctex || $ptex)."\n";}}
else {
print $indent.$node->toString."\n"; }
}
#**********************************************************************
__END__
=head1 NAME
C<latexmlfind> finds interesting things in LaTeXML generated XML.
=head1 SYNOPSIS
latexmlfind [options] xmlfile
Options:
--symbol=symbol finds equations where the symbol appears.
--unknown=symbol finds equations where the unknown symbol appears (ie role=UNKNOWN).
--possiblefunction=symbol finds equations where symbol is possibly used as a function.
--label=symbol finds objects with the given label.
--refnum=symbol finds objects with the given refnum (reference number).
--quiet suppress messages (can repeat)
--verbose more informative output (can repeat)
--VERSION show version number.
--help shows help message.
=head1 OPTIONS AND ARGUMENTS
latexmlfind is useful for finding objects within an XML file generated by LaTeXML.
=over 4
=item B<--output=>I<outputfile>
Specifies the output file; by default the XML is written to stdout.
=item B<--unknown=>I<symbol>
Finds equations where the unknown symbol appears.
=item B<--possiblefunction=>I<symbol>
Finds equations where symbol is possibly used as a function.
=item B<--label=>I<label>
Finds objects (sections, equations, whatever) labeled by the given label.
=item B<--refnum=>I<refnum>
Finds objects (sections, equations, whatever) with the given reference number.
=item B<--quiet>
Reduces the verbosity of output during processing, used twice is pretty silent.
=item B<--verbose>
Increases the verbosity of output during processing, used twice is pretty chatty.
Can be useful for getting more details when errors occur.
=item B<--VERSION>
Shows the version number of latexmlfind..
=item B<--help>
Shows this help message.
=back
=head1 SEE ALSO
L<latexml>, L<LaTeXML>
=cut
#**********************************************************************
|