/usr/share/perl5/LaTeXML/Post/Split.pm is in latexml 0.7.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | # /=====================================================================\ #
# | LaTeXML::Post::Split | #
# | Split documents into pages | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Post::Split;
use strict;
use LaTeXML::Util::Pathname;
use LaTeXML::Common::XML;
use base qw(LaTeXML::Post);
sub new {
my($class,%options)=@_;
my $self = $class->SUPER::new(%options);
$$self{split_xpath} = $options{split_xpath};
$$self{splitnaming} = $options{splitnaming};
$$self{no_navigation} = $options{no_navigation};
$self; }
sub process {
my($self,$doc)=@_;
my $root = $doc->getDocumentElement;
# RISKY and annoying; to split, we really need an id on the root.
# Writer will remove it.
$root->setAttribute('xml:id'=>'TEMPORARY_DOCUMENT_ID') unless $root->hasAttribute('xml:id');
my @docs = ($doc);
my @pages = $self->getPages($doc);
# Weird test: exclude the "whole document" from the list (?)
@pages = grep($_->parentNode->parentNode,@pages); # Strip out the root node.
if(@pages){
$self->Progress($doc,"Splitting into ".scalar(@pages)." pages");
my $tree = {node=>$root,id=>$root->getAttribute('xml:id'),name=>$doc->getDestination,children=>[]};
# Group the pages into a tree, in case they are nested.
my $haschildren={};
foreach my $page (@pages){
presortPages($tree,$haschildren,$page); }
# Work out the destination paths for each page
$self->prenamePages($doc,$tree,$haschildren);
# Now, create remove and create documents for each page.
push(@docs,$self->processPages($doc,@{$$tree{children}}));
# Add navigation to the sequence.
if(!$$self{no_navigation}){
my $rootid = $doc->getDocumentElement->getAttribute('xml:id');
$doc->addNavigation(start=>$rootid) if $rootid;
for(my $i=0; $i<$#docs; $i++){
$docs[$i]->addNavigation(next=>$docs[$i+1]->getDocumentElement->getAttribute('xml:id'));
$docs[$i+1]->addNavigation(previous=>$docs[$i]->getDocumentElement->getAttribute('xml:id')); }}
}
@docs; }
# Get the nodes in the document that will be separate "pages".
# Subclass can override, if needed.
sub getPages {
my($self,$doc)=@_;
$doc->findnodes($$self{split_xpath}); }
# Sort the pages into a tree, in case some pages are children of others
# If a page contains NOTHING BUT child pages (except frontmatter),
# we could just merge that page as a level in it's containing TOC instead of a document.???
sub presortPages {
my($tree,$haschildren,$page)=@_;
my $nextlevel;
if(($nextlevel = $$tree{children}[-1]) && (isDescendant($page, $$nextlevel{node}))){
presortPages($nextlevel,$haschildren,$page); }
else {
$$haschildren{$$tree{node}->localname}=1; # Wrong key for this!?!
push(@{$$tree{children}},
{node=>$page,upid=>$$tree{id}, id=>$page->getAttribute('xml:id'),parent=>$tree,children=>[]}); }}
# Is $node an descendant of $possibleparent?
# Probably even belongs somewhere else??
sub isDescendant {
my($node,$possibleparent)=@_;
do {
$node = $node->parentNode;
return 1 if $node && ($$node == $$possibleparent); }}
# Get destination pathnames for each page.
sub prenamePages {
my($self,$doc,$tree,$haschildren)=@_;
foreach my $entry (@{$$tree{children}}){
$$entry{name}=$self->getPageName($doc,$$entry{node},$$tree{node},$$tree{name},
$$haschildren{$$entry{node}->localname});
$self->prenamePages($doc,$entry,$haschildren); }
}
# Process a sequence of page entries, removing them and generating documents for each.
sub processPages {
my($self,$doc,@entries)=@_;
my $rootid = $doc->getDocumentElement->getAttribute('xml:id');
my @docs=();
while(@entries){
my $parent = $entries[0]->{node}->parentNode;
# Remove $page & ALL following siblings (backwards).
my @removed = ();
while(my $sib = $parent->lastChild){
$parent->removeChild($sib);
unshift(@removed,$sib);
last if $$sib == ${$entries[0]->{node}}; }
# Build toc from adjacent nodes that are being extracted.
my $hit_appendices=0;
my @toc = ();
my @apptoc = ();
# Process a sequence of adjacent pages; these will go into the same TOC.
while(@entries && @removed && ${$entries[0]->{node}} == ${$removed[0]}){
my $entry = shift(@entries);
my $page = $$entry{node};
$doc->removeNodes(shift(@removed));
my $id = $page->getAttribute('xml:id');
my $tocentry =['ltx:tocentry',{},
['ltx:ref',{class=>'toc', idref=>$id, show=>'fulltitle'}]];
$hit_appendices |= $page->localname =~ /^appendix/;
if($hit_appendices){
push(@apptoc,$tocentry); }
else {
push(@toc,$tocentry); }
# Due to the way document building works, we remove & process children pages
# BEFORE processing this page.
my @childdocs = $self->processPages($doc,@{$$entry{children}});
my $subdoc = $doc->newDocument($page,destination=>$$entry{name},
parentDocument=>$doc,parent_id=>$$entry{upid});
$subdoc->addNavigation(start=>$rootid) if $rootid;
$subdoc->addNavigation(up=>$$entry{upid});
push(@docs,$subdoc,@childdocs); }
# Finally, add the toc to reflect the consecutive, removed nodes, and add back the remainder
$doc->addNodes($parent,['ltx:TOC',{},['ltx:toclist',{},@toc]]) if @toc;
$doc->addNodes($parent,['ltx:TOC',{class=>'appendixtoc'},['ltx:toclist',{},@apptoc]])
if @apptoc;
map($parent->addChild($_),@removed); }
@docs; }
our $COUNTER=0;
sub getPageName {
my($self,$doc,$page,$parent,$parentpath,$recursive)=@_;
my $asdir;
my $naming = $$self{splitnaming};
my $attr = ($naming =~ /^id/ ? 'xml:id'
: ($naming =~ /^label/ ? 'labels' : undef));
my $name = $page->getAttribute($attr);
$name =~ s/\s+.*// if $name; # Truncate in case multiple labels.
$name =~ s/^LABEL:// if $name;
if(!$name){
if(($attr eq 'labels') && ($name=$page->getAttribute('xml:id'))){
$self->Warn($doc,$doc->getQName($page)." has no $attr attribute for pathname; using id=$name");
$attr='xml:id'; }
else {
$self->Warn($doc,$doc->getQName($page)." has no $attr attribute for pathname");
$name="FOO".++$COUNTER; }}
if($naming =~ /relative$/){
my $pname = $parent->getAttribute($attr);
if($pname && $name =~ /^\Q$pname\E(\.|_|:)+(.*)$/){
$name = $2; }
$asdir=$recursive; }
$name =~ s/:+/_/g;
if($asdir){
pathname_make(dir=>pathname_directory($parentpath) . "/$name",
name=>'index',
type=>$doc->getDestinationExtension); }
else {
pathname_make(dir=>pathname_directory($parentpath),
name=>$name,
type=>$doc->getDestinationExtension); }}
# ================================================================================
1;
|