/usr/share/pyshared/hyphen/dictools.py is in python-pyhyphen 1.0~beta1-2build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | # PyHyphen - hyphenation for Python
# module: dictools
'''
This module contains convenience functions to handle hyphenation dictionaries.
'''
import os, urllib2, csv, pickle, config, hyphen
from StringIO import StringIO
from zipfile import ZipFile
__all__ = ['install', 'is_installed', 'uninstall', 'list_installed', 'install_dict_info']
def list_installed(directory = config.default_dict_path):
'''Return a list of strings containing language and country codes of the
dictionaries installed in 'directory' (default as declared in config.py).
Example: file name = 'hyph_en_US.dic'. Return value: ['en_US']'''
return [d[5:-4] for d in os.listdir(directory)
if (d.startswith('hyph_') and d.endswith('.dic'))]
def is_installed(language, directory = config.default_dict_path):
'''return True if 'directory' (default as declared in config.py)
contains a dictionary file for 'language',
False otherwise.
By convention, 'language' should have the form 'll_CC'.
Example: 'en_US' for US English.
'''
return (language in list_installed(directory))
def install(language, directory = config.default_dict_path,
repos = config.default_repository):
'''
Download and install a dictionary file.
language: a string of the form 'll_CC'. Example: 'en_US' for English, USA
directory: the installation directory. Defaults to the
value given in config.py. After installation this is the package root of 'hyphen'
repos: the url of the dictionary repository. (Default: as declared in config.py;
after installation this is the OpenOffice repository for dictionaries.).
'''
if hyphen.dict_info and language in hyphen.dict_info:
fn = hyphen.dict_info[language]['file_name']
else:
fn = 'hyph_' + language + '.dic'
url = ''.join((repos, fn))
s = urllib2.urlopen(url).read()
z = ZipFile(StringIO(s))
if z.testzip():
raise IOError('The ZIP archive containing the dictionary is corrupt.')
dic_filename = ''.join((hyphen.dict_info[language]['name'], '.dic'))
dic_str = z.read(dic_filename)
dest = open('/'.join((directory, dic_filename)), 'w')
dest.write(dic_str)
dest.close()
def uninstall(language, directory = config.default_dict_path):
'''
Uninstall the dictionary of the specified language.
'language': is by convention a string of the form 'll_CC' whereby ll is the
language code and CC the country code.
'directory' (default: config.default_dict_path'. After installation of PyHyphen
this is the package root of 'hyphen'.
'''
if hyphen.dict_info:
file_path = ''.join((directory, '/', hyphen.dict_info[language]['name'], '.dic'))
else:
file_path = ''.join((directory, '/', 'hyph_', language, '.dic'))
os.remove(file_path)
def install_dict_info(save = True, directory = config.default_dict_path):
'''download metadata on available dictionaries from the oo website and stores it locally.'''
l = urllib2.urlopen('http://ftp.osuosl.org/pub/openoffice/contrib/dictionaries/hyphavail.lst').readlines()
stream = StringIO('\n'.join(l))
d = csv.DictReader(stream, fieldnames = ['language_code', 'country_code',
'name', 'long_descr', 'file_name'])
avail_dict = {}
for i in d:
key = '_'.join((i['language_code'], i['country_code']))
avail_dict[key] = i
if save:
file_path = directory + '/dict_info.pickle'
f = open(file_path, 'w')
pickle.dump(avail_dict, f)
f.close()
return avail_dict
|