/usr/share/subdownloader/FileManagement/Subtitle.py is in subdownloader 2.0.14-1.1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | #!/usr/bin/env python
# Copyright (c) 2010 SubDownloader Developers - See COPYING - GPLv3
import re, sys, os, logging
import modules.videofile as videofile
import modules.subtitlefile as subtitlefile
from FileManagement import FileScan,get_extension, clear_string, without_extension
from languages import Languages, autodetect_lang
log = logging.getLogger("subdownloader.FileManagement.Subtitle")
def AutoDetectSubtitle(pathvideofile, sub_list=None):
""" will try to guess the subtitle for the given filepath video """
log.debug("----------------")
log.debug("AutoDetectSubtitle started with: %r, %r"% (pathvideofile, sub_list))
if os.path.isfile(pathvideofile):
videofolder = os.path.dirname(pathvideofile)
filename1_noextension = without_extension(os.path.basename(pathvideofile))
else:
log.debug("AutoDetectSubtitle argument must be a complete video path")
return ""
#1st METHOD , EXACT FILENAME THAN THE VIDEO WITHOUT EXTENSION
log.debug("1st method starting...")
for ext in subtitlefile.SUBTITLES_EXT:
possiblefilenamesrt = filename1_noextension + "." + ext
if sub_list:
print sub_list
try:
# check if subtitle is in our list
sub_list.index(possiblefilenamesrt)
return possiblefilenamesrt
except ValueError, e:
log.debug(e)
except AttributeError, e:
log.debug(e)
elif os.path.exists(os.path.join(videofolder, possiblefilenamesrt)):
return os.path.join(videofolder, possiblefilenamesrt)
#2nd METHOD FIND THE AVI NAME MERGED INTO THE SUB NAME
log.debug("2nd method starting...")
cleaned_file = clear_string(filename1_noextension.lower())
filesfound = []
if sub_list:
search_list = sub_list
else:
search_list = os.listdir(videofolder)
for filename in search_list:
for ext in subtitlefile.SUBTITLES_EXT:
try:
if filename.lower().endswith("."+ext):
filesfound.append(filename) #To be used in the 4th method
cleaned_found = clear_string(without_extension(filename.lower()))
if "srt" in subtitlefile.SUBTITLES_EXT and cleaned_found.find(cleaned_file) != -1:
if sub_list:
return filename
else:
return os.path.join(videofolder,filename)
elif cleaned_file.find(cleaned_found) != -1:
if sub_list:
return filename
else:
return os.path.join(videofolder,filename)
except AttributeError, e:
log.error(e)
#3rd METHOD SCORE EVERY SUBTITLE (this needs the sub_list) (by searching the filename of the video in the content of the subtitle)
if sub_list:
log.debug("3rd method starting...")
sub_scores = score_subtitles(pathvideofile, sub_list)
best_scored_sub = sub_scores.keys()[0]
for sub in sub_scores:
if sub_scores[sub] > sub_scores[best_scored_sub]:
best_scored_sub = sub
if sub_scores[best_scored_sub] > 0:
return best_scored_sub
else:
log.debug("3rd was skipped")
#4th METHOD WE TAKE THE SUB IF THERE IS ONLY ONE
log.debug("4th method starting...")
if len(filesfound) == 1:
if sub_list:
return filesfound[0]
else:
return os.path.join(videofolder,filesfound[0])
return ""
def score_subtitles(video, subtitle_list):
"""Will to a pseudo scoring on the subtitle list
@video: video file name
@subtitle_list: list of subtitle file names
returns dictionary like {'subtitle_file_name': score}
"""
log.debug("Subtitle scoring started with: %r, %r"% (video, subtitle_list))
video_name = os.path.basename(video)
# set initial scores to 0
if isinstance(subtitle_list, list):
sub_dict = dict(zip(subtitle_list, [0]*len(subtitle_list)))
elif isinstance(subtitle_list, dict):
sub_dict = dict(zip(subtitle_list.keys(), [0]*len(subtitle_list)))
for sub in sub_dict:
sub_name = subtitle_list[sub].getFileName()
#fetch the seperating character
if re.search("\W",sub_name):
sep_ch = re.search("\W",sub_name).group(0)
splited_sub = sub_name.split(sep_ch)
# iterate over each word and serch for it in the video file name
for w in splited_sub:
if w in video_name:
sub_dict[sub] += 1
else:
continue
log.debug("scoring for %s is %i"% (sub_name, sub_dict[sub]))
# return scored subtitles
return sub_dict
def GetLangFromFilename(filepath):
filepath = os.path.basename(filepath)
if filepath.count('.') >= 2:
return get_extension(without_extension(filepath))
else:
return ""
#FIXME: when language is 'Brazlian' wrong value is returned: 'Bra' instead of 'pob')
def AutoDetectLang(filepath):
if isSubtitle(filepath):
subtitle_content = file(filepath,mode='rb').read()
Languages.CleanTagsFile(subtitle_content)
#Initializing the Language Detector
n = autodetect_lang._NGram()
l = autodetect_lang.NGram()
percentage, lang = l.classify(subtitle_content)
pos = lang.rfind("-") #for example belarus-windows1251.lm we only need belarus
if pos != -1:
return lang[:pos]
else:
return lang
else:
return ""
def subtitle_name_gen(video_filename, extension=".srt"):
"""Generates a subtitle file name given the video file name
"""
video_name = ""
sub_name = ""
if isinstance(video_filename, str):
if get_extension(video_filename) in videofile.VIDEOS_EXT:
video_name = without_extension(video_filename)
elif isinstance(video_filename, videofile):
if get_extension(video_filename.getFileName()) in videofile.VIDEOS_EXT:
video_name = without_extension(video_filename.getFileName())
if video_name:
sub_name = video_name + extension
return sub_name
else:
log.debug("No video name to generate subtitle file name")
return ""
def isSubtitle(filepath):
if get_extension(filepath).lower() in subtitlefile.SUBTITLES_EXT:
return True
return False
|