/usr/lib/python3.2/encodings/utf_8_sig.py is in python3.2-minimal 3.2.3-0ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """ Python 'utf-8-sig' Codec
This work similar to UTF-8 with the following changes:
* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
first three bytes.
* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
bytes will be skipped.
"""
import codecs
### Codec APIs
def encode(input, errors='strict'):
return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
len(input))
def decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
codecs.IncrementalEncoder.__init__(self, errors)
self.first = 1
def encode(self, input, final=False):
if self.first:
self.first = 0
return codecs.BOM_UTF8 + \
codecs.utf_8_encode(input, self.errors)[0]
else:
return codecs.utf_8_encode(input, self.errors)[0]
def reset(self):
codecs.IncrementalEncoder.reset(self)
self.first = 1
def getstate(self):
return self.first
def setstate(self, state):
self.first = state
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def __init__(self, errors='strict'):
codecs.BufferedIncrementalDecoder.__init__(self, errors)
self.first = 1
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
def reset(self):
codecs.BufferedIncrementalDecoder.reset(self)
self.first = 1
def getstate(self):
state = codecs.BufferedIncrementalDecoder.getstate(self)
# state[1] must be 0 here, as it isn't passed along to the caller
return (state[0], self.first)
def setstate(self, state):
# state[1] will be ignored by BufferedIncrementalDecoder.setstate()
codecs.BufferedIncrementalDecoder.setstate(self, state)
self.first = state[1]
class StreamWriter(codecs.StreamWriter):
def reset(self):
codecs.StreamWriter.reset(self)
try:
del self.encode
except AttributeError:
pass
def encode(self, input, errors='strict'):
self.encode = codecs.utf_8_encode
return encode(input, errors)
class StreamReader(codecs.StreamReader):
def reset(self):
codecs.StreamReader.reset(self)
try:
del self.decode
except AttributeError:
pass
def decode(self, input, errors='strict'):
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return ("", 0)
elif input[:3] == codecs.BOM_UTF8:
self.decode = codecs.utf_8_decode
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
return (output, consumed+3)
# (else) no BOM present
self.decode = codecs.utf_8_decode
return codecs.utf_8_decode(input, errors)
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='utf-8-sig',
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
|