This file is indexed.

/usr/share/parley/plugins/leo-dict.py is in parley-data 4:15.12.3-0ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env kf5kross
# -*- coding: utf-8 -*-

import socket
import urllib2
import urllib
from sgmllib import SGMLParser
import Parley
import re

#german to french (Glas) [links off]
#http://dict.leo.org/frde?lp=frde&lang=de&searchLoc=1&cmpType=relaxed&sectHdr=on&spellToler=on&chinese=both&pinyin=diacritic&search=Glas&relink=off

#german to spanish (Glas)
#http://dict.leo.org/esde?lp=esde&lang=de&searchLoc=1&cmpType=relaxed&sectHdr=on&spellToler=std&chinese=both&pinyin=diacritic&search=Glas&relink=on

#spanish to german (tarro)
#http://dict.leo.org/esde?lp=esde&lang=de&searchLoc=-1&cmpType=relaxed&sectHdr=on&spellToler=std&chinese=both&pinyin=diacritic&search=tarro&relink=on

#spanish to german (tarro) [links off]
#http://dict.leo.org/esde?lp=esde&lang=de&searchLoc=-1&cmpType=relaxed&sectHdr=on&spellToler=std&chinese=both&pinyin=diacritic&search=tarro&relink=off

# timeout of search (important for slow connections, not to freeze Parley by waiting for a result)
timeout = 1.0
socket.setdefaulttimeout(timeout)

def languageString(from_lang,to_lang):
  combined = from_lang + "-" + to_lang
  A = { 'es-de': 'esde', 'de-es':'esde', 'fr-de':'frde', 'de-fr':'frde', 'en-de':'ende', 'de-en':'ende', 'it-de':'itde', 'de-it':'itde', 'ch-de':'chde', 'de-ch':'chde' }
  if (A.has_key(combined)):
    return A[combined]
  else:
    return None

def searchLoc(from_lang,to_lang):
  if from_lang == "de":
    return 1
  return -1

# fetches the html document for the given word and language pair
def fetchData(word,from_lang,to_lang):
  locale = languageString(from_lang,to_lang)
  if locale == None:
    return #not supported languages
  url = "http://dict.leo.org/"+locale
  #esde?lp=esde&lang=de&searchLoc=-1&cmpType=relaxed&sectHdr=on&spellToler=std&chinese=both&pinyin=diacritic&search=tarro&relink=off
  params = [("lp",locale),("lang","de"),("searchLoc",searchLoc(from_lang,to_lang)),("cmpType","relaxed"),("sectHdr","on"),("spellToler","std"),("chinese","both"),("pinyin","diacritic"),("search",word),("relink","off")]

  #param_word_trn = ("q",word)       #set query parameter
  #param_lang_pair = ("langpair",from_lang+"|"+to_lang)
  request_url = url + "?" + urllib.urlencode(params)
  #print request_url
  try:
    results = urllib2.urlopen(request_url)
    return results.read()
  except:
    #in case of error not to return incompleted results
    return ""

#parses data and returns the parser object (that contains the translations/langpairs found)
def parseData(data,word,from_lang,to_lang):
  p = myParser()
  p.word = word
  p.from_lang = from_lang
  p.to_lang = to_lang
  p.feed(data)
  p.close()
  return p

#corrects the difference between the locale names of Parley and the google dictionary
def locale(lang):
  if lang == "en_US":
    return "en"
  if lang == "en_GB":
    return "en"
  if lang == "zh_TW":
    return "zh-TW"
  if lang == "zh_HK":
    return "zh-HK"
  if lang == "zh_CN":
    return "zh-CN"

  return lang

# called by Parley to translate the word
def translateWord(word,from_lang,to_lang):
  print "dict-leo.py - Translating:",word, "from", from_lang, "to", to_lang
  data = fetchData(word,locale(from_lang),locale(to_lang))
  if data != None:
    print "Data Fetched for:",word
    #print data
  #print data
  parser = parseData(data,word,from_lang,to_lang)
  #return parser.words

# called by Parley to retrieve the language pairs provided by this script
# should return: [("en","fr"),("en","de")] for translation from english to french and english to german
def getLanguagePairs():
  data = fetchData("ignorethis","en","fr")
  parser = parseData(data)
  return map(split_langpair,parser.langpairs)

# function to split a language pair string into a tuple
def split_langpair(s):
  [f,t] = s.split("|",1)
  return (f,t)

# ------------ HTML Parser ----------- #

class myParser(SGMLParser):
  #for every start_tagname function you add you have to make sure the tag is added to the self.tags_stack

  def reset(self):
    SGMLParser.reset(self)
    #self.tags_stack = []
    self.td_data_stack = []
    self.in_results_table = False
    self.td_data = ""
    self.td_after = 0
    self.keyword_found = False
    self.in_translation_td = False
    self.in_small = False

  def start_table(self,attrs):
    #mark the results table
    if ("id","results") in attrs:
      self.in_results_table = True
    
  #to ignore the <small> tags
  def start_small(self,attrs):
    self.in_small = True
    
  #def end_small(self):
    #self.in_small = False
    
  #checks if the word contained by this <td> (self.td_data) matches the keyword. If yes then the corresponding td_data (previous/next two) is added as a translation
  def end_td(self):
    #print "end of a table data"
    #print "-" ,self.td_data , "-"
    #print myParser.clearWord(self,self.td_data)

    self.td_data = myParser.clearWord(self,self.td_data)

    #matching on the second column (german) and getting the translation from 2 column's on the left
    if self.td_data.lower() == self.word.lower(): #found word
      #print self.td_data
      self.keyword_found = True
      if self.from_lang == "de": #then get the one that is two td_data behind (using the stack)
        #print "Translation: ", self.td_data_stack[0]
        Parley.addTranslation(self.word,self.from_lang,self.to_lang,self.td_data_stack[0].strip())

    #matching on the first column (not german) and getting the translation from 2 column's on the right
    if self.in_translation_td: #found translation
      if self.from_lang != "de":
        #print "Translation: ", self.td_data
        Parley.addTranslation(self.word,self.from_lang,self.to_lang,self.td_data.strip())
      self.in_translation_td = False
      self.td_after = 0
      self.keyword_found = False

    #append td_data on the stack that keeps the last two ones
    self.td_data_stack.append(self.td_data)
    
    #make it keep only the last 2 td_data (works since we append only one item at a time)
    if len(self.td_data_stack) > 2:
      self.td_data_stack.pop(0)

    self.td_data = ""
    
    self.in_small = False

  #marks in which <td> to be able to count 2 more <td>'s and find our translation
  def start_td(self,attrs):
    if self.keyword_found == True:
        self.td_after += 1
        if self.td_after == 2:
          self.in_translation_td = True

  def handle_data(self,data):
    if self.in_small:
      return
    self.td_data += data
     
  #cleans up the given word from parentheses etc
  def clearWord(self,word):
    #word = "b[lue] socks (and) red shoes"
    p = re.compile( '(jmdn\.|etw\.)')
    word = p.sub( '', word)
    p = re.compile( '(\(.*\))')
    word = p.sub( '', word)
    p = re.compile( '(\[.*\])')
    word = p.sub( '', word)
    p = re.compile( '(\W)',re.UNICODE)
    word = p.sub( ' ', word)
    #replace double spaces produced from the previous ones
    p = re.compile( '(\s\s)')
    word = p.sub( ' ', word)
    return word.strip()