This file is indexed.

/usr/share/parley/plugins/google_dictionary.py is in parley-data 4:15.12.3-0ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env kf5kross

import socket
import urllib2
import urllib
from sgmllib import SGMLParser
import Parley
import re

# timeout of search (important for slow connections, not to freeze Parley by waiting for a result)
timeout = 1.0
socket.setdefaulttimeout(timeout)

# fetches the html document for the given word and language pair
def fetchData(word,from_lang,to_lang):
  url = "http://translate.google.com/translate_dict"
  param_word_trn = ("q",word)       #set query parameter
  param_lang_pair = ("langpair",from_lang+"|"+to_lang)
  request_url = url + "?" + urllib.urlencode([param_word_trn,param_lang_pair])
  #print request_url
  try:
    results = urllib2.urlopen(request_url)
    return results.read()
  except:
    #in case of error not to return incompleted results
    return ""

#parses data and returns the parser object (that contains the translations/langpairs found)
def parseData(data,word,from_lang,to_lang):
  p = myParser()
  p.word = word
  p.from_lang = from_lang
  p.to_lang = to_lang
  p.feed(data)
  p.close()
  return p

#corrects the difference between the locale names of Parley and the google dictionary
def locale(lang):
  if lang == "en_US":
    return "en"
  if lang == "zh_TW":
    return "zh-TW"
  if lang == "zh_HK":
    return "zh-HK"
  if lang == "zh_CN":
    return "zh-CN"

  return lang

# called by Parley to translate the word
def translateWord(word,from_lang,to_lang):
  print "google_dictionary.py - Translating",word,from_lang,to_lang
  data = fetchData(word,locale(from_lang),locale(to_lang))
  parser = parseData(data,word,from_lang,to_lang)
  #return parser.words

# called by Parley to retrieve the language pairs provided by this script
# should return: [("en","fr"),("en","de")] for translation from english to french and english to german
def getLanguagePairs():
  data = fetchData("ignorethis","en","fr")
  parser = parseData(data)
  return map(split_langpair,parser.langpairs)

# function to split a language pair string into a tuple
def split_langpair(s):
  [f,t] = s.split("|",1)
  return (f,t)

# ------------ HTML Parser ----------- #

class myParser(SGMLParser):
  #for every start_tagname function you add you have to make sure the tag is added to the self.tags_stack

  def reset(self):
    SGMLParser.reset(self)
    self.words = []         #translated words found in html
    self.langpairs = []     #language pairs found in html file
    self.tags_stack = []
    self.stop = False

  def unknown_starttag(self,tag,attrs):
    self.tags_stack.append(tag)
    #print "unknown : ", tag, " ", len(self.tags_stack)

  def start_span(self, attrs):
    #print "known : ", "span", " ", len(self.tags_stack)
    if ("class","definition") in attrs:
        self.tags_stack.append("<!translation!>") #marks tag with <!translation!> to get its data in handle_data
    else:
        self.tags_stack.append("span")

  def start_option(self, attrs):
    for name,value in attrs:
      if name == "value":
        self.langpairs.append(value)
    self.tags_stack.append("option")

  def handle_data(self,data):
    if data == "Web definitions": self.stop = True #to make it stop after the web definitions
    if len(self.tags_stack) > 0 and self.tags_stack[len(self.tags_stack)-1] == "<!translation!>":
        #print "data: ", data
        self.words.append(data.strip())
        #print self.word, self.from_lang, self.to_lang
        if self.stop == False:
          w = self.clearWord(data)
          Parley.addTranslation(self.word,self.from_lang,self.to_lang,w)
  
  def unknown_endtag(self,tag):
    myParser.remove_not_closed_tags(self,tag)
    if len(self.tags_stack) > 0 and self.tags_stack[len(self.tags_stack)-1] == tag:
        #print "end_tag : ", tag, " ", len(self.tags_stack)
        self.tags_stack.pop()

  #removes all the tags from the stack that have no closed tags (don't modify)
  def remove_not_closed_tags(self,tag):
    while len(self.tags_stack) > 0 and self.tags_stack[len(self.tags_stack)-1] != tag:
      self.tags_stack.pop()

  #cleans up the given word from parentheses etc
  def clearWord(self,word):
    #word = "b[lue] socks (and) red shoes"
    p = re.compile( '(jmdn\.|etw\.)')
    word = p.sub( '', word)
    p = re.compile( '(\(.*\))')
    word = p.sub( '', word)
    p = re.compile( '(\[.*\])')
    word = p.sub( '', word)
    p = re.compile( '(\W)',re.UNICODE)
    word = p.sub( ' ', word)
    #replace double spaces produced from the previous ones
    p = re.compile( '(\s\s)')
    word = p.sub( ' ', word)
    return word.strip()