/usr/share/namazu/pl/gfilter.pl is in namazu2-index-tools 2.0.21-10.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | #
# -*- Perl -*-
# $Id: gfilter.pl,v 1.1.2.6 2006-04-16 12:48:49 opengl2772 Exp $
# Copyright (C) 1999 Satoru Takabayashi ,
# 2000-2006 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
#
package gfilter;
use English;
# Show debug information for filters
sub show_filter_debug_info ($$$$) {
my ($contref, $weighted_str, $fields, $headings) = @_;
util::dprint("-- title --\n$fields->{'title'}\n")
if defined $fields->{'title'};
util::dprint("-- content --\n$$contref\n");
util::dprint("-- weighted_str: --\n$$weighted_str\n");
util::dprint("-- headings --\n$$headings\n");
}
# Adjust white spaces
sub white_space_adjust_filter ($) {
my ($text) = @_;
return undef unless defined($$text);
$$text =~ s/[ \t]+/ /g;
$$text =~ s/\r\n/\n/g;
$$text =~ s/\r/\n/g;
$$text =~ s/\n+/\n/g;
$$text =~ s/^ +//gm;
$$text =~ s/ +$//gm;
$$text =~ s/ +/ /g;
# Control characters be into space
$$text =~ tr/\x00-\x09\x0b-\x1f/ /;
}
# get a title from a file name.
sub filename_to_title ($$) {
my ($cfile, $weighted_str) = @_;
# for MSWin32's filename using Shift_JIS [1998-09-24]
if (($English::OSNAME eq "MSWin32") || ($English::OSNAME eq "os2")) {
$cfile = codeconv::shiftjis_to_eucjp($cfile);
codeconv::eucjp_han2zen_kana(\$cfile);
}
codeconv::normalize_eucjp(\$cfile);
my $filename = $cfile;
$filename = $1 if ($cfile =~ m!^.*/([^/]*)$!);
# get keywords from a file name.
# modified [1998-09-18]
my $tmp = $filename;
$tmp =~ tr|/\\_\.-| |;
my $weight = $conf::Weight{'html'}->{'title'};
$$weighted_str .= "\x7f$weight\x7f$tmp\x7f/$weight\x7f\n";
my $title = $filename;
return $title
}
# Remove SPACE/TAB at the beginning or ending of the line.
# And remove '>|#:' at the begenning of the line.
# Join hyphenation for English text.
# Remove LF if the line is ended with a Japanese character and
# length of the line is 40 or more longer.
#
# Original of this code was contributed by <furukawa@tcp-ip.or.jp>.
# [1997-09-15]
#
sub line_adjust_filter ($) {
my ($text) = @_;
return undef unless defined($$text);
my @tmp = split(/\n/, $$text);
for my $line (@tmp) {
$line .= "\n";
$line =~ s/^[ \>\|\#\:]+//;
$line =~ s/ +$//;
$line =~ s/\n// if (($line =~ /[\xa1-\xfe]\n*$/) &&
(length($line) >=40));
$line =~ s/(¡£|¡¢)$/$1\n/;
$line =~ s/([a-z])-\n/$1/; # for hyphenation.
}
$$text = join('', @tmp);
}
# not implimented yet.
sub analize_rcs_stamp()
{
}
1;
|