/usr/share/pyshared/spyderlib/utils/encoding.py is in python-spyderlib 2.1.9-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | # -*- coding: utf-8 -*-
#
# Copyright © 2009-2010 Pierre Raybaut
# Licensed under the terms of the MIT License
# (see spyderlib/__init__.py for details)
"""
Text encoding utilities, text file I/O
Functions 'get_coding', 'decode', 'encode' and 'to_unicode' come from Eric4
source code (Utilities/__init___.py) Copyright © 2003-2009 Detlev Offenbach
"""
from __future__ import with_statement
import re, os, locale
from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
PREFERRED_ENCODING = locale.getpreferredencoding()
def transcode(text, input=PREFERRED_ENCODING, output=PREFERRED_ENCODING):
"""Transcode a text string"""
try:
return text.decode("cp437").encode("cp1252")
except UnicodeError:
try:
return text.decode("cp437").encode(output)
except UnicodeError:
return text
CODING_RE = re.compile(r"coding[:=]\s*([-\w_.]+)")
CODECS = ['utf-8', 'iso8859-1', 'iso8859-15', 'koi8-r', 'koi8-u',
'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9',
'iso8859-10', 'iso8859-13', 'iso8859-14', 'latin-1',
'utf-16']
def get_coding(text):
"""
Function to get the coding of a text.
@param text text to inspect (string)
@return coding string
"""
for line in text.splitlines()[:2]:
result = CODING_RE.search(line)
if result:
return result.group(1)
return None
def decode(text):
"""
Function to decode a text.
@param text text to decode (string)
@return decoded text and encoding
"""
try:
if text.startswith(BOM_UTF8):
# UTF-8 with BOM
return unicode(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
elif text.startswith(BOM_UTF16):
# UTF-16 with BOM
return unicode(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
elif text.startswith(BOM_UTF32):
# UTF-32 with BOM
return unicode(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
coding = get_coding(text)
if coding:
return unicode(text, coding), coding
except (UnicodeError, LookupError):
pass
# Assume UTF-8
try:
return unicode(text, 'utf-8'), 'utf-8-guessed'
except (UnicodeError, LookupError):
pass
# Assume Latin-1 (behaviour before 3.7.1)
return unicode(text, "latin-1"), 'latin-1-guessed'
def encode(text, orig_coding):
"""
Function to encode a text.
@param text text to encode (string)
@param orig_coding type of the original coding (string)
@return encoded text and encoding
"""
if orig_coding == 'utf-8-bom':
return BOM_UTF8 + text.encode("utf-8"), 'utf-8-bom'
# Try declared coding spec
coding = get_coding(text)
if coding:
try:
return text.encode(coding), coding
except (UnicodeError, LookupError):
raise RuntimeError("Incorrect encoding (%s)" % coding)
if orig_coding and orig_coding.endswith('-default'):
coding = orig_coding.replace("-default", "")
try:
return text.encode(coding), coding
except (UnicodeError, LookupError):
pass
if orig_coding == 'utf-8-guessed':
return text.encode('utf-8'), 'utf-8'
# Try saving as ASCII
try:
return text.encode('ascii'), 'ascii'
except UnicodeError:
pass
# Save as UTF-8 without BOM
return text.encode('utf-8'), 'utf-8'
def to_unicode(string):
"""Convert a string to unicode"""
if not isinstance(string, unicode):
for codec in CODECS:
try:
unic = unicode(string, codec)
except UnicodeError:
pass
except TypeError:
break
else:
return unic
return string
def write(text, filename, encoding='utf-8', mode='wb'):
"""
Write 'text' to file ('filename') assuming 'encoding'
Return (eventually new) encoding
"""
text, encoding = encode(text, encoding)
with open(filename, mode) as textfile:
textfile.write(text)
return encoding
def writelines(lines, filename, encoding='utf-8', mode='wb'):
"""
Write 'lines' to file ('filename') assuming 'encoding'
Return (eventually new) encoding
"""
return write(os.linesep.join(lines), filename, encoding, mode)
def read(filename, encoding='utf-8'):
"""
Read text from file ('filename')
Return text and encoding
"""
text, encoding = decode( file(filename, 'rb').read() )
return text, encoding
def readlines(filename, encoding='utf-8'):
"""
Read lines from file ('filename')
Return lines and encoding
"""
text, encoding = read(filename, encoding)
return text.split(os.linesep), encoding
|