/usr/lib/python3/dist-packages/pysrt/srtfile.py is in python3-pysrt 1.0.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 | # -*- coding: utf-8 -*-
import os
import sys
import codecs
try:
from collections import UserList
except ImportError:
from UserList import UserList
from itertools import chain
from copy import copy
from pysrt.srtexc import Error
from pysrt.srtitem import SubRipItem
from pysrt.compat import str
BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
(codecs.BOM_UTF32_BE, 'utf_32_be'),
(codecs.BOM_UTF16_LE, 'utf_16_le'),
(codecs.BOM_UTF16_BE, 'utf_16_be'),
(codecs.BOM_UTF8, 'utf_8'))
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)
class SubRipFile(UserList, object):
"""
SubRip file descriptor.
Provide a pure Python mapping on all metadata.
SubRipFile(items, eol, path, encoding)
items -> list of SubRipItem. Default to [].
eol -> str: end of line character. Default to linesep used in opened file
if any else to os.linesep.
path -> str: path where file will be saved. To open an existant file see
SubRipFile.open.
encoding -> str: encoding used at file save. Default to utf-8.
"""
ERROR_PASS = 0
ERROR_LOG = 1
ERROR_RAISE = 2
DEFAULT_ENCODING = 'utf_8'
def __init__(self, items=None, eol=None, path=None, encoding='utf-8'):
UserList.__init__(self, items or [])
self._eol = eol
self.path = path
self.encoding = encoding
def _get_eol(self):
return self._eol or os.linesep
def _set_eol(self, eol):
self._eol = self._eol or eol
eol = property(_get_eol, _set_eol)
def slice(self, starts_before=None, starts_after=None, ends_before=None,
ends_after=None):
"""
slice([starts_before][, starts_after][, ends_before][, ends_after]) \
-> SubRipFile clone
All arguments are optional, and should be coercible to SubRipTime
object.
It reduce the set of subtitles to those that match match given time
constraints.
The returned set is a clone, but still contains references to original
subtitles. So if you shift this returned set, subs contained in the
original SubRipFile instance will be altered too.
Example:
>>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2)
"""
clone = copy(self)
if starts_before:
clone.data = (i for i in clone.data if i.start < starts_before)
if starts_after:
clone.data = (i for i in clone.data if i.start > starts_after)
if ends_before:
clone.data = (i for i in clone.data if i.end < ends_before)
if ends_after:
clone.data = (i for i in clone.data if i.end > ends_after)
clone.data = list(clone.data)
return clone
def at(self, timestamp=None, **kwargs):
"""
at(timestamp) -> SubRipFile clone
timestamp argument should be coercible to SubRipFile object.
A specialization of slice. Return all subtiles visible at the
timestamp mark.
Example:
>>> subs.at((0, 0, 20, 0)).shift(seconds=2)
>>> subs.at(seconds=20).shift(seconds=2)
"""
time = timestamp or kwargs
return self.slice(starts_before=time, ends_after=time)
def shift(self, *args, **kwargs):
"""shift(hours, minutes, seconds, milliseconds, ratio)
Shift `start` and `end` attributes of each items of file either by
applying a ratio or by adding an offset.
`ratio` should be either an int or a float.
Example to convert subtitles from 23.9 fps to 25 fps:
>>> subs.shift(ratio=25/23.9)
All "time" arguments are optional and have a default value of 0.
Example to delay all subs from 2 seconds and half
>>> subs.shift(seconds=2, milliseconds=500)
"""
for item in self:
item.shift(*args, **kwargs)
def clean_indexes(self):
"""
clean_indexes()
Sort subs and reset their index attribute. Should be called after
destructive operations like split or such.
"""
self.sort()
for index, item in enumerate(self):
item.index = index + 1
@property
def text(self):
return '\n'.join(i.text for i in self)
@classmethod
def open(cls, path='', encoding=None, error_handling=ERROR_PASS):
"""
open([path, [encoding]])
If you do not provide any encoding, it can be detected if the file
contain a bit order mark, unless it is set to utf-8 as default.
"""
new_file = cls(path=path, encoding=encoding)
source_file = cls._open_unicode_file(path, claimed_encoding=encoding)
new_file.read(source_file, error_handling=error_handling)
source_file.close()
return new_file
@classmethod
def from_string(cls, source, **kwargs):
"""
from_string(source, **kwargs) -> SubRipFile
`source` -> a unicode instance or at least a str instance encoded with
`sys.getdefaultencoding()`
"""
error_handling = kwargs.pop('error_handling', None)
new_file = cls(**kwargs)
new_file.read(source.splitlines(True), error_handling=error_handling)
return new_file
def read(self, source_file, error_handling=ERROR_PASS):
"""
read(source_file, [error_handling])
This method parse subtitles contained in `source_file` and append them
to the current instance.
`source_file` -> Any iterable that yield unicode strings, like a file
opened with `codecs.open()` or an array of unicode.
"""
self.eol = self._guess_eol(source_file)
self.extend(self.stream(source_file, error_handling=error_handling))
return self
@classmethod
def stream(cls, source_file, error_handling=ERROR_PASS):
"""
stream(source_file, [error_handling])
This method yield SubRipItem instances a soon as they have been parsed
without storing them. It is a kind of SAX parser for .srt files.
`source_file` -> Any iterable that yield unicode strings, like a file
opened with `codecs.open()` or an array of unicode.
Example:
>>> import pysrt
>>> import codecs
>>> file = codecs.open('movie.srt', encoding='utf-8')
>>> for sub in pysrt.stream(file):
... sub.text += "\nHello !"
... print unicode(sub)
"""
string_buffer = []
for index, line in enumerate(chain(source_file, '\n')):
if line.strip():
string_buffer.append(line)
else:
source = string_buffer
string_buffer = []
if source and all(source):
try:
yield SubRipItem.from_lines(source)
except Error as error:
error.args += (''.join(source), )
cls._handle_error(error, error_handling, index)
def save(self, path=None, encoding=None, eol=None):
"""
save([path][, encoding][, eol])
Use initial path if no other provided.
Use initial encoding if no other provided.
Use initial eol if no other provided.
"""
path = path or self.path
encoding = encoding or self.encoding
save_file = codecs.open(path, 'w+', encoding=encoding)
self.write_into(save_file, eol=eol)
save_file.close()
def write_into(self, output_file, eol=None):
"""
write_into(output_file [, eol])
Serialize current state into `output_file`.
`output_file` -> Any instance that respond to `write()`, typically a
file object
"""
output_eol = eol or self.eol
for item in self:
string_repr = str(item)
if output_eol != '\n':
string_repr = string_repr.replace('\n', output_eol)
output_file.write(string_repr)
# Only add trailing eol if it's not already present.
# It was kept in the SubRipItem's text before but it really
# belongs here. Existing applications might give us subtitles
# which already contain a trailing eol though.
if not string_repr.endswith(2 * output_eol):
output_file.write(output_eol)
@classmethod
def _guess_eol(cls, string_iterable):
first_line = cls._get_first_line(string_iterable)
for eol in ('\r\n', '\r', '\n'):
if first_line.endswith(eol):
return eol
return os.linesep
@classmethod
def _get_first_line(cls, string_iterable):
if hasattr(string_iterable, 'tell'):
previous_position = string_iterable.tell()
try:
first_line = next(iter(string_iterable))
except StopIteration:
return ''
if hasattr(string_iterable, 'seek'):
string_iterable.seek(previous_position)
return first_line
@classmethod
def _detect_encoding(cls, path):
file_descriptor = open(path, 'rb')
first_chars = file_descriptor.read(BIGGER_BOM)
file_descriptor.close()
for bom, encoding in BOMS:
if first_chars.startswith(bom):
return encoding
# TODO: maybe a chardet integration
return cls.DEFAULT_ENCODING
@classmethod
def _open_unicode_file(cls, path, claimed_encoding=None):
encoding = claimed_encoding or cls._detect_encoding(path)
source_file = codecs.open(path, 'rU', encoding=encoding)
# get rid of BOM if any
possible_bom = CODECS_BOMS.get(encoding, None)
if possible_bom:
file_bom = source_file.read(len(possible_bom))
if not file_bom == possible_bom:
source_file.seek(0) # if not rewind
return source_file
@classmethod
def _handle_error(cls, error, error_handling, index):
if error_handling == cls.ERROR_RAISE:
error.args = (index, ) + error.args
raise error
if error_handling == cls.ERROR_LOG:
name = type(error).__name__
sys.stderr.write('PySRT-%s(line %s): \n' % (name, index))
sys.stderr.write(error.args[0].encode('ascii', 'replace'))
sys.stderr.write('\n')
|