This file is indexed.

/usr/bin/tmxclean is in libxml-tmx-perl 0.36-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/perl -s

# PODNAME: tmxclean
# ABSTRACT: Simple tool to clean TMX files

use v5.10;
use strict;
use warnings;

use XML::TMX::Reader;

our (
     $junk, # remove if one of the languages just have junk
     $output,  # output filename
     $eq ,  # remove if seg(l1) = seg(l2)
     $len , # remove if len(li) > 50 ∧ len(lj) > 2len(li)
     $v, $verbose
    );

my $cleaned = 0;
my $processed = 0;
my $tmx = shift or help();
my $reader = XML::TMX::Reader->new($tmx);

$junk//=1;
$output ||= "_cleaned_$tmx";

print STDERR "loading..." if $v;

$reader->for_tu( {output => $output},
                  \&cleaner);

printf STDERR "\rRemoved %d/%d (%.3f%%).\n", 
              $cleaned, $processed, 100*$cleaned/$processed if $v;

sub cleaner {
    my $langs = shift;
    $processed++;
    my $remove = 0;
    my %seg=();
    my @len=();
    for my $k (keys %$langs) {
        next if $k =~ /^-/;
        $remove = 1 if $eq    && $seg{$langs->{$k}{-seg}}++;

        $remove = 1 if $junk  && $langs->{$k}{-seg} =~ /^[-.,0-9\s]+$/;
        $remove = 1 if $junk  && $langs->{$k}{-seg} =~ /^\W*$/;
        push(@len, length($langs->{$k}{-seg}));
    }
    @len = sort{$a <=> $b} @len;
    $remove = 1 if $len && $len[0] > 50 && $len[0]*2< $len[-1];

    $cleaned++ if $remove;
    printf STDERR "\rRemoved %d/%d (%.3f%%)...", $cleaned, $processed,
      100*$cleaned/$processed if $v && $processed%1000==0;
    return $remove ? undef : $langs;
}

sub help {
    print "   tmxclean [-junk=1] <file.tmx>\n";
    exit 1;
}

__END__

=pod

=encoding UTF-8

=head1 NAME

tmxclean - Simple tool to clean TMX files

=head1 VERSION

version 0.36

=head1 SYNOPSIS

  $ tmx-clean file.tmx

=head1 DESCRIPTION

Removes the Translation units that

 1. have no letters...  (unless -junk=0)
 2. seg(L1) = seg(L2)   (if -eq)

=head1 SEE ALSO

XML::TMX

=head1 AUTHORS

=over 4

=item *

Alberto Simões <ambs@cpan.org>

=item *

José João Almeida <jj@di.uminho.pt>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010-2017 by Projeto Natura <natura@di.uminho.pt>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut