This file is indexed.

/usr/bin/tv_remove_some_overlapping is in xmltv-util 0.5.70-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#!/usr/bin/perl -w

=pod

=head1 NAME

tv_remove_some_overlapping - Remove some overlapping programmes from XMLTV data.

=head1 SYNOPSIS

tv_remove_some_overlapping [--help] [--output FILE] [FILE...]

=head1 DESCRIPTION

Read one or more XMLTV files and write a file to standard output
containing the same data, except that some 'magazine' programmes which
seem to contain two or more other programmes are removed.

For example, if 'Schools TV' runs from 10:00 to 12:00, and there are
two programmes 'History' from 10:00 to 11:00 and 'Geography' from
11:00 to 12:00 on the same channel, then 'Schools TV' could be
removed.  A programme is removed only if there are two or more other
programmes which partition its timeslot, which implies that it and
these other programmes must have stop times specified.

To avoid throwing away any real programmes, no programme will be
discarded if it has content data other than title and URL.

Filtering this tool won't remove all overlapping programmes but it
will deal with the 'big magazine programme containing smaller
programmes' data commonly seen from listings sources.

B<--output FILE> write to FILE rather than standard output

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use strict;
use XMLTV::Version '$Id: tv_remove_some_overlapping,v 1.7 2015/06/23 20:05:25 knowledgejunkie Exp $ ';
use XMLTV::Date;
use Getopt::Long;
use Date::Manip;

BEGIN {
    if (int(Date::Manip::DateManipVersion) >= 6) {
	Date::Manip::Date_Init("SetDate=now,UTC");
    } else {
	Date::Manip::Date_Init("TZ=UTC");
    }
}

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

use XMLTV;
use XMLTV::Usage <<END
$0: remove some programmes which seem to be mere containers for others
usage: $0 [--help] [--output FILE] [FILE...]
END
;

# Memoize some subroutines if possible
eval { require Memoize };
unless ($@) {
    foreach (qw/Date_Cmp pd interesting/) {
	Memoize::memoize($_) or die "cannot memoize $_: $!";
    }
}

sub pd( $ );
sub exists_partition( $$$$$ );
sub interesting( $ );
sub should_write( $ );

# Keys of a programme hash which don't indicate any data we're
# especially concerned to preserve.  Poke around inside XMLTV.pm to
# find the list of attributes.
#
my %boring_programme_key = (title => 1);
$boring_programme_key{$_} = 1
  foreach map { $_->[0] } @XMLTV::Programme_Attributes;
$boring_programme_key{url} = 1; # common in some sources, and boring

my ($opt_help, $opt_output);
GetOptions('help' => \$opt_help, 'output=s' => \$opt_output) or usage(0);
usage(1) if $opt_help;
@ARGV = ('-') if not @ARGV;

my %w_args = ();
if (defined $opt_output) {
    my $fh = new IO::File ">$opt_output";
    die "cannot write to $opt_output\n" if not $fh;
    %w_args = (OUTPUT => $fh);
}

# Unfortunately we need to load the whole file before processing.  I
# don't want to require the input to be sorted, since tv_sort adds
# guessed stop times which could cause this program to remove too many
# programmes.  This is another reason to eventually change tv_sort not
# to add stop times (move into tv_guess_stop_times or whatever).
#
my ($encoding, $credits, $ch, $progs) = @{XMLTV::parsefiles(@ARGV)};
my $w = new XMLTV::Writer(%w_args, encoding => $encoding);
$w->start($credits);
$w->write_channels($ch);

# Since zero-length and unknown-length programmes are always written
# unchanged, we could write them immediately and discard them.  But
# it's a bit nicer to write the output in the same order as the input.
# However, we don't bother to index these programmes, they cannot /
# should not be used in looking for partitionings.
#
my %by_channel_and_start;
foreach (@$progs) {
    push @{$by_channel_and_start{$_->{channel}}{pd $_->{start}}}, $_
      if interesting $_;
}
$w->write_programme($_) foreach grep { should_write($_) } @$progs;
$w->end();
exit();

# Given that %by_channel_and_start and %boring_programme_key have been
# set up, should a programme (with start and stop time) be written?
#
sub should_write( $ ) {
    my $p = shift;

    # Always write zero length and unknown-length programmes.
    return 1 if not interesting $p;

    # If this programme cannot be partitioned by at least two others,
    # definitely write it.
    #
    return 1
      unless exists_partition(pd $p->{start}, pd $p->{stop},
			      $p->{channel},
			      2, { $p => 1 });

    foreach (keys %$p) {
	if (not $boring_programme_key{$_}) {
	    warn <<END
not filtering programme at $p->{start} on $p->{channel} because it has $_
END
  ;
	    return 1;
	}
    }

    return 0;
}

# We process only programmes with stop time and nonzero length.
sub interesting( $ ) {
    my $p = shift;
    my $stop = $p->{stop};
    return 0 if not defined $stop;
    my $cmp = Date_Cmp(pd $p->{start}, pd $stop);
    if ($cmp < 0) {
	# start < stop, okay.
	return 1;
    }
    elsif ($cmp == 0) {
	# Zero length, won't consider.
	return 0;
    }
    elsif ($cmp > 0) {
	warn "programme on $p->{channel} "
	  . "with start time ($p->{start}) "
	    . "before stop time ($stop)\n";
	return 0;
    }
    else { die }
}

# Does there exist a sequence of programmes hopping from $start to
# $stop, where none of the programmes is in $used and the sequence is
# at least $min_length long?
#
# $start and $stop are Date::Manip objects, $used a hash whose keys
# are used programmes and whose values are true.
#
sub exists_partition( $$$$$ ) {
    my ($start, $stop, $channel, $min_length, $used) = @_;
#    local $Log::TraceMessages::On = 1;
    t "seeking at least $min_length $start to $stop on $channel";
    t '... not including: ' . d $used;
    my $cmp = Date_Cmp($start, $stop);
    if ($cmp < 0) {
	t 'start before stop, okay';
	my @poss = grep { not $used->{$_} }
	  @{$by_channel_and_start{$channel}{$start}};
	t 'possible first steps of path: ' . d \@poss;
	--$min_length if $min_length;
	foreach my $p (@poss) {
	    return 1
	      if exists_partition(pd $p->{stop}, $stop, $channel,
				  $min_length, { %$used, $p => 1 });
	}
	t 'no paths found';
	return 0;
    }
    elsif ($cmp == 0) {
	t 'zero length, so path of length zero';
	return not $min_length;
    }
    elsif ($cmp > 0) {
	t 'stop < start, so no path';
	return 0;
    }
    else { die }
}

# Lift parse_date() to handle undef.
sub pd( $ ) {
    for ($_[0]) {
	return undef if not defined;
	return parse_date($_);
    }
}

exit 1;