This file is indexed.

/usr/share/pyshared/hachoir_subfile/search.py is in python-hachoir-subfile 0.5.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
from hachoir_core.error import HACHOIR_ERRORS, error
from hachoir_core.stream import InputSubStream
from hachoir_core.tools import humanFilesize, humanDuration
from hachoir_core.memory import limitedMemory
from hachoir_subfile.data_rate import DataRate
from hachoir_subfile.output import Output
from hachoir_subfile.pattern import HachoirPatternMatching as PatternMatching
from sys import stderr
from time import time

def skipSubfile(parser):
    subfile = parser.getParserTags().get("subfile", "")
    return (subfile == "skip")

FILE_MAX_SIZE = 100 * 1024 * 1024   # Max. file size in bytes (100 MB)
SLICE_SIZE = 64*1024                # Slice size in bytes (64 KB)
MEMORY_LIMIT = 50*1024*1024
PROGRESS_UPDATE = 1.5   # Minimum number of second between two progress messages

class SearchSubfile:
    """
    Tool to find file start and file size in any binary stream.

    To use it:
    - instanciate the class: subfile = SearchSubfile()
    - (optional) choose magics with: subfile.loadMagics(categories, parser_ids)
    - run the search: subfile.main()
    """

    def __init__(self, stream, offset=0, size=None):
        """
        Setup search tool, parameter:
         - filename: Input filename in locale charset
         - directory: Directory filename in locale charset where
           output files will be written
         - offset: Offset (in bytes) of the beginning of the search
         - size: Limit size (in bytes) of input file (None: no limit)
         - debug: Debug mode flag (display debug information)
        """

        # Size
        self.stream = stream
        if size is not None:
            self.size = min(self.stream.size, (offset+size)*8)
        else:
            self.size = self.stream.size

        # Offset
        self.start_offset = offset*8
        self.current_offset = self.start_offset
        self.slice_size = SLICE_SIZE*8   # 64 KB (in bits)

        # Statistics
        self.datarate = DataRate(self.start_offset)
        self.main_start = time()

        # Other flags and attributes
        self.patterns = None
        self.verbose = True
        self.debug = False
        self.output = None
        self.filter = None

    def setOutput(self, directory):
        self.output = Output(directory)

    def loadParsers(self, categories=None, parser_ids=None):
        before = time()
        self.patterns = PatternMatching(categories, parser_ids)
        if self.debug:
            print "Regex compilation: %.1f ms" % ((time() - before)*1000)
            print "Use regex: %s" % self.patterns

    def main(self):
        """
        Run the search.
        Return True if ok, False otherwise.
        """

        # Initialize
        self.mainHeader()

        # Prepare search
        main_error = False
        try:
            # Run search
            limitedMemory(MEMORY_LIMIT, self.searchSubfiles)
        except KeyboardInterrupt:
            print >>stderr, "[!] Program interrupted (CTRL+C)"
            main_error = True
        except MemoryError:
            main_error = True
            print >>stderr, "[!] Memory error!"
        self.mainFooter()
        return not(main_error)

    def mainHeader(self):
        # Fix slice size if needed
        self.slice_size = max(self.slice_size, self.patterns.max_length * 8)

        # Load parsers if none has been choosen
        if not self.patterns:
            self.loadParsers()

        bytes = (self.size-self.start_offset)//8
        print >>stderr, "[+] Start search on %s bytes (%s)" % (
            bytes, humanFilesize(bytes))
        print >>stderr
        self.stats = {}
        self.current_offset = self.start_offset
        self.main_start = time()

    def mainFooter(self):
        print >>stderr
        print >>stderr, "[+] End of search -- offset=%s (%s)" % (
            self.current_offset//8, humanFilesize(self.current_offset//8))
        size = (self.current_offset - self.start_offset) // 8
        duration = time() - self.main_start
        if 0.1 <= duration:
            print >>stderr, "Total time: %s -- global rate: %s/sec" % (
                humanDuration(duration*1000), humanFilesize(size // duration))

    def searchSubfiles(self):
        """
        Search all subfiles in the stream, call processParser() for each parser.
        """
        self.next_offset = None
        self.next_progress = time() + PROGRESS_UPDATE
        while self.current_offset < self.size:
            self.datarate.update(self.current_offset)
            if self.verbose and self.next_progress <= time():
                self.displayProgress()
            for offset, parser in self.findMagic(self.current_offset):
                self.processParser(offset, parser)
            self.current_offset += self.slice_size
            if self.next_offset:
                self.current_offset = max(self.current_offset, self.next_offset)
            self.current_offset = min(self.current_offset, self.size)

    def processParser(self, offset, parser):
        """
        Process a valid parser.
        """
        text = "[+] File at %s" % (offset//8)
        if parser.content_size is not None:
            text += " size=%s (%s)" % (parser.content_size//8, humanFilesize(parser.content_size//8))
        if not(parser.content_size) or parser.content_size//8 < FILE_MAX_SIZE:
            text += ": " + parser.description
        else:
            text += ": " + parser.__class__.__name__

        if self.output and parser.content_size:
            if (offset == 0 and parser.content_size == self.size):
                text += " (don't copy whole file)"
            elif parser.content_size//8 >= FILE_MAX_SIZE:
                text += " (don't copy file, too big)"
            elif not self.filter or self.filter(parser):
                filename = self.output.createFilename(parser.filename_suffix)
                filename = self.output.writeFile(filename, self.stream, offset, parser.content_size)
                text += " => %s" % filename
        print text
        self.next_progress = time() + PROGRESS_UPDATE

    def findMagic(self, offset):
        """
        Find all 'magic_str' strings in stream in offset interval:
          offset..(offset+self.slice_size).

        The function returns a generator with values (offset, parser) where
        offset is beginning of a file (relative to stream begin), and not the
        position of the magic.
        """
        start = offset
        end = start + self.slice_size
        end = min(end, self.size)
        data = self.stream.readBytes(start, (end-start)//8)
        for parser_cls, offset in self.patterns.search(data):
            offset += start
            # Skip invalid offset
            if offset < 0:
                continue
            if offset < self.next_offset:
                continue

            # Create parser at found offset
            parser = self.guess(offset, parser_cls)

            # Update statistics
            if parser_cls not in self.stats:
                self.stats[parser_cls] = [0, 0]
            self.stats[parser_cls][0] += 1
            if not parser:
                continue

            # Parser is valid, yield it with the offset
            self.stats[parser_cls][1] += 1

            if self.debug:
                print >>stderr, "Found %s at offset %s" % (
                    parser.__class__.__name__, offset//8)
            yield (offset, parser)

            # Set next offset
            if parser.content_size is not None\
            and skipSubfile(parser):
                self.next_offset = offset + parser.content_size
                if end <= self.next_offset:
                    break

    def guess(self, offset, parser_cls):
        """
        Try the specified parser at stream offset 'offset'.

        Return the parser object, or None on failure.
        """
        substream = InputSubStream(self.stream, offset)
        try:
            return parser_cls(substream, validate=True)
        except HACHOIR_ERRORS:
            return None

    def displayProgress(self):
        """
        Display progress (to stdout) of the whole process.
        Compute data rate (in byte per sec) and time estimation.
        """
        # Program next update
        self.next_progress = time() + PROGRESS_UPDATE

        # Progress offset
        percent = float(self.current_offset - self.start_offset) * 100 / (self.size - self.start_offset)
        offset = self.current_offset // 8
        message = "Search: %.2f%% -- offset=%u (%s)" % (
            percent, offset, humanFilesize(offset))

        # Compute data rate (byte/sec)
        average = self.datarate.average
        if average:
            message += " -- %s/sec " % humanFilesize(average // 8)
            eta = float(self.size - self.current_offset) / average
            message += " -- ETA: %s" % humanDuration(eta * 1000)

        # Display message
        print >>stderr, message