This file is indexed.

/usr/lib/python3/dist-packages/guessit/transfo/guess_filetype.py is in python3-guessit 0.11.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

from __future__ import absolute_import, division, print_function, unicode_literals

import mimetypes
import os.path
import re

from guessit.guess import Guess
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
from guessit.transfo import TransformerException
from guessit.plugins.transformers import Transformer, get_transformer
from guessit.matcher import log_found_guess, found_guess


class GuessFiletype(Transformer):
    def __init__(self):
        Transformer.__init__(self, 200)

    # List of well known movies and series, hardcoded because they cannot be
    # guessed appropriately otherwise
    MOVIES = ['OSS 117']
    SERIES = ['Band of Brothers']

    MOVIES = [m.lower() for m in MOVIES]
    SERIES = [s.lower() for s in SERIES]

    def guess_filetype(self, mtree, options=None):
        options = options or {}

        # put the filetype inside a dummy container to be able to have the
        # following functions work correctly as closures
        # this is a workaround for python 2 which doesn't have the
        # 'nonlocal' keyword which we could use here in the upgrade_* functions
        # (python 3 does have it)
        filetype_container = [mtree.guess.get('type')]
        other = {}
        filename = mtree.string

        def upgrade_episode():
            if filetype_container[0] == 'subtitle':
                filetype_container[0] = 'episodesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'episodeinfo'
            elif (not filetype_container[0] or
                  filetype_container[0] == 'video'):
                filetype_container[0] = 'episode'

        def upgrade_movie():
            if filetype_container[0] == 'subtitle':
                filetype_container[0] = 'moviesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'movieinfo'
            elif (not filetype_container[0] or
                  filetype_container[0] == 'video'):
                filetype_container[0] = 'movie'

        def upgrade_subtitle():
            if filetype_container[0] == 'movie':
                filetype_container[0] = 'moviesubtitle'
            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodesubtitle'
            elif not filetype_container[0]:
                filetype_container[0] = 'subtitle'

        def upgrade_info():
            if filetype_container[0] == 'movie':
                filetype_container[0] = 'movieinfo'
            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodeinfo'
            elif not filetype_container[0]:
                filetype_container[0] = 'info'

        # look at the extension first
        fileext = os.path.splitext(filename)[1][1:].lower()
        if fileext in subtitle_exts:
            upgrade_subtitle()
            other = {'container': fileext}
        elif fileext in info_exts:
            upgrade_info()
            other = {'container': fileext}
        elif fileext in video_exts:
            other = {'container': fileext}
        else:
            if fileext and not options.get('name_only'):
                other = {'extension': fileext}
                list(mtree.unidentified_leaves())[-1].guess = Guess(other)

        # check whether we are in a 'Movies', 'Tv Shows', ... folder
        folder_rexps = [(r'Movies?', upgrade_movie),
                        (r'Films?', upgrade_movie),
                        (r'Tv[ _-]?Shows?', upgrade_episode),
                        (r'Series?', upgrade_episode),
                        (r'Episodes?', upgrade_episode)]
        for frexp, upgrade_func in folder_rexps:
            frexp = re.compile(frexp, re.IGNORECASE)
            for pathgroup in mtree.children:
                if frexp.match(pathgroup.value):
                    upgrade_func()
                    return filetype_container[0], other

        # check for a few specific cases which will unintentionally make the
        # following heuristics confused (eg: OSS 117 will look like an episode,
        # season 1, epnum 17, when it is in fact a movie)
        fname = mtree.clean_string(filename).lower()
        for m in self.MOVIES:
            if m in fname:
                self.log.debug('Found in exception list of movies -> type = movie')
                upgrade_movie()
                return filetype_container[0], other
        for s in self.SERIES:
            if s in fname:
                self.log.debug('Found in exception list of series -> type = episode')
                upgrade_episode()
                return filetype_container[0], other

        # if we have an episode_rexp (eg: s02e13), it is an episode
        episode_transformer = get_transformer('guess_episodes_rexps')
        if episode_transformer:
            filename_parts = list(x.value for x in mtree.unidentified_leaves())
            filename_parts.append(filename)
            for filename_part in filename_parts:
                guess = episode_transformer.guess_episodes_rexps(filename_part)
                if guess:
                    self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
                    upgrade_episode()
                    return filetype_container[0], other

        properties_transformer = get_transformer('guess_properties')
        if properties_transformer:
            # if we have certain properties characteristic of episodes, it is an ep
            found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat')
            guess = properties_transformer.container.as_guess(found, filename)
            if guess:
                self.log.debug('Found characteristic property of episodes: %s"', guess)
                upgrade_episode()
                return filetype_container[0], other

            weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
            if weak_episode_transformer:
                found = weak_episode_transformer.container.find_properties(filename, mtree, options, 'episodeNumber')
                guess = weak_episode_transformer.container.as_guess(found, filename)
                if guess and (guess.raw('episodeNumber')[0] == '0' or guess['episodeNumber'] >= 10):
                    self.log.debug('Found characteristic property of episodes: %s"', guess)
                    upgrade_episode()
                    return filetype_container[0], other

                found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
                guess = properties_transformer.container.as_guess(found, filename)
                if guess:
                    found = weak_episode_transformer.container.find_properties(filename, mtree, options)
                    guess = weak_episode_transformer.container.as_guess(found, filename)
                    if guess:
                        self.log.debug('Found characteristic property of episodes: %s"', guess)
                        upgrade_episode()
                        return filetype_container[0], other

            found = properties_transformer.container.find_properties(filename, mtree, options, 'format')
            guess = properties_transformer.container.as_guess(found, filename)
            if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
                # Use weak episodes only if TV or WEB source
                weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
                if weak_episode_transformer:
                    guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
                    if guess:
                        self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
                        upgrade_episode()
                        return filetype_container[0], other

        website_transformer = get_transformer('guess_website')
        if website_transformer:
            found = website_transformer.container.find_properties(filename, mtree, options, 'website')
            guess = website_transformer.container.as_guess(found, filename)
            if guess:
                for namepart in ('tv', 'serie', 'episode'):
                    if namepart in guess['website']:
                        # origin-specific type
                        self.log.debug('Found characteristic property of episodes: %s', guess)
                        upgrade_episode()
                        return filetype_container[0], other

        if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
            # if no episode info found, assume it's a movie
            self.log.debug('Nothing characteristic found, assuming type = movie')
            upgrade_movie()

        if not filetype_container[0]:
            self.log.debug('Nothing characteristic found, assuming type = unknown')
            filetype_container[0] = 'unknown'

        return filetype_container[0], other

    def process(self, mtree, options=None):
        """guess the file type now (will be useful later)
        """
        filetype, other = self.guess_filetype(mtree, options)

        mtree.guess.set('type', filetype, confidence=1.0)
        log_found_guess(mtree.guess)

        filetype_info = Guess(other, confidence=1.0)
        # guess the mimetype of the filename
        # TODO: handle other mimetypes not found on the default type_maps
        # mimetypes.types_map['.srt']='text/subtitle'
        mime, _ = mimetypes.guess_type(mtree.string, strict=False)
        if mime is not None:
            filetype_info.update({'mimetype': mime}, confidence=1.0)

        # Retrieve the last node of category path (extension node)
        node_ext = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1]
        found_guess(node_ext, filetype_info)

        if mtree.guess.get('type') in [None, 'unknown']:
            if options.get('name_only'):
                mtree.guess.set('type', 'movie', confidence=0.6)
            else:
                raise TransformerException(__name__, 'Unknown file type')

    def second_pass_options(self, mtree, options=None):
        if 'type' not in options or not options['type']:
            if mtree.info.get('type') != 'episode':
                # now look whether there are some specific hints for episode vs movie
                # If we have a date and no year, this is a TV Show.
                if 'date' in mtree.info and 'year' not in mtree.info:
                    return {'type': 'episode'}

            if mtree.info.get('type') != 'movie':
                # If we have a year, no season but raw episodeNumber is a number not starting with '0', this is a movie.
                if 'year' in mtree.info and 'episodeNumber' in mtree.info and not 'season' in mtree.info:
                    try:
                        int(mtree.raw['episodeNumber'])
                        return {'type': 'movie'}
                    except ValueError:
                        pass