This file is indexed.

/usr/share/perl5/Gscan2pdf/Ocropus.pm is in gscan2pdf 1.3.9-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package Gscan2pdf::Ocropus;

use 5.008005;
use strict;
use warnings;
use Carp;
use File::Temp;    # To create temporary files
use File::Basename;
use HTML::Entities;
use Encode;
use English qw( -no_match_vars );    # for $PROCESS_ID

our $VERSION = '1.3.9';

my ( $exe, $installed, $setup, $logger );

sub setup {
    ( my $class, $logger ) = @_;
    return $installed if $setup;
    if ( system('which ocroscript > /dev/null 2> /dev/null') == 0 ) {
        my $env = $ENV{OCROSCRIPTS};

        if ( not defined $env ) {
            for (qw(/usr /usr/local)) {
                if ( -d "$_/share/ocropus/scripts" ) {
                    $env = "$_/share/ocropus/scripts";
                }
            }
        }
        if ( defined $env ) {
            my $script;
            if ( -f "$env/recognize.lua" ) {
                $script = 'recognize';
            }
            elsif ( -f "$env/rec-tess.lua" ) {
                $script = 'rec-tess';
            }
            if ( defined $script ) {
                $exe       = "ocroscript $script";
                $installed = 1;
                $logger->info("Using ocroscript with $script.");
            }
            else {
                $logger->warn(
                    'Found ocroscript, but no recognition scripts. Disabling.');
            }
        }
        else {
            $logger->warn('Found ocroscript, but not its scripts. Disabling.');
        }
    }
    $setup = 1;
    return $installed;
}

sub hocr {
    my ( $class, %options ) = @_;
    my ( $png, $cmd );
    if ( not $setup ) { Gscan2pdf::Ocropus->setup( $options{logger} ) }

    if (   ( $options{file} !~ /[.](?:png|jpg|pnm)$/xsm )
        or ( defined $options{threshold} and $options{threshold} ) )
    {

        # Temporary filename for new file
        $png = File::Temp->new( SUFFIX => '.png' );
        my $image = Image::Magick->new;
        $image->Read( $options{file} );

        my $x;
        if ( defined $options{threshold} and $options{threshold} ) {
            $logger->info("thresholding at $options{threshold} to $png");
            $image->BlackThreshold( threshold => "$options{threshold}%" );
            $image->WhiteThreshold( threshold => "$options{threshold}%" );
            $x = $image->Quantize( colors => 2 );
            $x = $image->Write( depth => 1, filename => $png );
        }
        else {
            $logger->info("writing temporary image $png");
            $image->Write( filename => $png );
        }
        if ("$x") { $logger->warn($x) }
    }
    else {
        $png = $options{file};
    }
    if ( $options{language} ) {
        $cmd = "tesslanguage=$options{language} $exe $png";
    }
    else {
        $cmd = "$exe $png";
    }
    $logger->info($cmd);

    # decode html->utf8
    my $output;
    if ( defined $options{pidfile} ) {
        ( $output, undef ) =
          Gscan2pdf::Document::open_three(
            "echo $PROCESS_ID > $options{pidfile};$cmd");
    }
    else {
        ( $output, undef ) = Gscan2pdf::Document::open_three($cmd);
    }
    my $decoded = decode_entities($output);

    # Unfortunately, there seems to be a case (tested in t/31_ocropus_utf8.t)
    # where decode_entities doesn't work cleanly, so encode/decode to finally
    # get good UTF-8
    return decode_utf8( encode_utf8($decoded) );
}

1;

__END__