/usr/share/pyshared/twisted/web2/fileupload.py is in python-twisted-web2 8.1.0-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | from __future__ import generators
import re
from zope.interface import implements
import urllib
import tempfile
from twisted.internet import defer
from twisted.web2.stream import IStream, FileStream, BufferedStream, readStream
from twisted.web2.stream import generatorToStream, readAndDiscard
from twisted.web2 import http_headers
from cStringIO import StringIO
###################################
##### Multipart MIME Reader #####
###################################
class MimeFormatError(Exception):
pass
# parseContentDispositionFormData is absolutely horrible, but as
# browsers don't seem to believe in sensible quoting rules, it's
# really the only way to handle the header. (Quotes can be in the
# filename, unescaped)
cd_regexp = re.compile(
' *form-data; *name="([^"]*)"(?:; *filename="(.*)")?$',
re.IGNORECASE)
def parseContentDispositionFormData(value):
match = cd_regexp.match(value)
if not match:
# Error parsing.
raise ValueError("Unknown content-disposition format.")
name=match.group(1)
filename=match.group(2)
return name, filename
#@defer.deferredGenerator
def _readHeaders(stream):
"""Read the MIME headers. Assumes we've just finished reading in the
boundary string."""
ctype = fieldname = filename = None
headers = []
# Now read headers
while 1:
line = stream.readline(size=1024)
if isinstance(line, defer.Deferred):
line = defer.waitForDeferred(line)
yield line
line = line.getResult()
#print "GOT", line
if not line.endswith('\r\n'):
if line == "":
raise MimeFormatError("Unexpected end of stream.")
else:
raise MimeFormatError("Header line too long")
line = line[:-2] # strip \r\n
if line == "":
break # End of headers
parts = line.split(':', 1)
if len(parts) != 2:
raise MimeFormatError("Header did not have a :")
name, value = parts
name = name.lower()
headers.append((name, value))
if name == "content-type":
ctype = http_headers.parseContentType(http_headers.tokenize((value,), foldCase=False))
elif name == "content-disposition":
fieldname, filename = parseContentDispositionFormData(value)
if ctype is None:
ctype == http_headers.MimeType('application', 'octet-stream')
if fieldname is None:
raise MimeFormatError('Content-disposition invalid or omitted.')
# End of headers, return (field name, content-type, filename)
yield fieldname, filename, ctype
return
_readHeaders = defer.deferredGenerator(_readHeaders)
class _BoundaryWatchingStream(object):
def __init__(self, stream, boundary):
self.stream = stream
self.boundary = boundary
self.data = ''
self.deferred = defer.Deferred()
length = None # unknown
def read(self):
if self.stream is None:
if self.deferred is not None:
deferred = self.deferred
self.deferred = None
deferred.callback(None)
return None
newdata = self.stream.read()
if isinstance(newdata, defer.Deferred):
return newdata.addCallbacks(self._gotRead, self._gotError)
return self._gotRead(newdata)
def _gotRead(self, newdata):
if not newdata:
raise MimeFormatError("Unexpected EOF")
# BLECH, converting buffer back into string.
self.data += str(newdata)
data = self.data
boundary = self.boundary
off = data.find(boundary)
if off == -1:
# No full boundary, check for the first character
off = data.rfind(boundary[0], max(0, len(data)-len(boundary)))
if off != -1:
# We could have a partial boundary, store it for next time
self.data = data[off:]
return data[:off]
else:
self.data = ''
return data
else:
self.stream.pushback(data[off+len(boundary):])
self.stream = None
return data[:off]
def _gotError(self, err):
# Propogate error back to MultipartMimeStream also
if self.deferred is not None:
deferred = self.deferred
self.deferred = None
deferred.errback(err)
return err
def close(self):
# Assume error will be raised again and handled by MMS?
readAndDiscard(self).addErrback(lambda _: None)
class MultipartMimeStream(object):
implements(IStream)
def __init__(self, stream, boundary):
self.stream = BufferedStream(stream)
self.boundary = "--"+boundary
self.first = True
def read(self):
"""
Return a deferred which will fire with a tuple of:
(fieldname, filename, ctype, dataStream)
or None when all done.
Format errors will be sent to the errback.
Returns None when all done.
IMPORTANT: you *must* exhaust dataStream returned by this call
before calling .read() again!
"""
if self.first:
self.first = False
d = self._readFirstBoundary()
else:
d = self._readBoundaryLine()
d.addCallback(self._doReadHeaders)
d.addCallback(self._gotHeaders)
return d
def _readFirstBoundary(self):
#print "_readFirstBoundary"
line = self.stream.readline(size=1024)
if isinstance(line, defer.Deferred):
line = defer.waitForDeferred(line)
yield line
line = line.getResult()
if line != self.boundary + '\r\n':
raise MimeFormatError("Extra data before first boundary: %r looking for: %r" % (line, self.boundary + '\r\n'))
self.boundary = "\r\n"+self.boundary
yield True
return
_readFirstBoundary = defer.deferredGenerator(_readFirstBoundary)
def _readBoundaryLine(self):
#print "_readBoundaryLine"
line = self.stream.readline(size=1024)
if isinstance(line, defer.Deferred):
line = defer.waitForDeferred(line)
yield line
line = line.getResult()
if line == "--\r\n":
# THE END!
yield False
return
elif line != "\r\n":
raise MimeFormatError("Unexpected data on same line as boundary: %r" % (line,))
yield True
return
_readBoundaryLine = defer.deferredGenerator(_readBoundaryLine)
def _doReadHeaders(self, morefields):
#print "_doReadHeaders", morefields
if not morefields:
return None
return _readHeaders(self.stream)
def _gotHeaders(self, headers):
if headers is None:
return None
bws = _BoundaryWatchingStream(self.stream, self.boundary)
self.deferred = bws.deferred
ret=list(headers)
ret.append(bws)
return tuple(ret)
def readIntoFile(stream, outFile, maxlen):
"""Read the stream into a file, but not if it's longer than maxlen.
Returns Deferred which will be triggered on finish.
"""
curlen = [0]
def done(_):
return _
def write(data):
curlen[0] += len(data)
if curlen[0] > maxlen:
raise MimeFormatError("Maximum length of %d bytes exceeded." %
maxlen)
outFile.write(data)
return readStream(stream, write).addBoth(done)
#@defer.deferredGenerator
def parseMultipartFormData(stream, boundary,
maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024):
# If the stream length is known to be too large upfront, abort immediately
if stream.length is not None and stream.length > maxSize:
raise MimeFormatError("Maximum length of %d bytes exceeded." %
maxSize)
mms = MultipartMimeStream(stream, boundary)
numFields = 0
args = {}
files = {}
while 1:
datas = mms.read()
if isinstance(datas, defer.Deferred):
datas = defer.waitForDeferred(datas)
yield datas
datas = datas.getResult()
if datas is None:
break
numFields+=1
if numFields == maxFields:
raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
# Parse data
fieldname, filename, ctype, stream = datas
if filename is None:
# Not a file
outfile = StringIO()
maxBuf = min(maxSize, maxMem)
else:
outfile = tempfile.NamedTemporaryFile()
maxBuf = maxSize
x = readIntoFile(stream, outfile, maxBuf)
if isinstance(x, defer.Deferred):
x = defer.waitForDeferred(x)
yield x
x = x.getResult()
if filename is None:
# Is a normal form field
outfile.seek(0)
data = outfile.read()
args.setdefault(fieldname, []).append(data)
maxMem -= len(data)
maxSize -= len(data)
else:
# Is a file upload
maxSize -= outfile.tell()
outfile.seek(0)
files.setdefault(fieldname, []).append((filename, ctype, outfile))
yield args, files
return
parseMultipartFormData = defer.deferredGenerator(parseMultipartFormData)
###################################
##### x-www-urlencoded reader #####
###################################
def parse_urlencoded_stream(input, maxMem=100*1024,
keep_blank_values=False, strict_parsing=False):
lastdata = ''
still_going=1
while still_going:
try:
yield input.wait
data = input.next()
except StopIteration:
pairs = [lastdata]
still_going=0
else:
maxMem -= len(data)
if maxMem < 0:
raise MimeFormatError("Maximum length of %d bytes exceeded." %
maxMem)
pairs = str(data).split('&')
pairs[0] = lastdata + pairs[0]
lastdata=pairs.pop()
for name_value in pairs:
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise MimeFormatError("bad query field: %s") % `name_value`
continue
if len(nv[1]) or keep_blank_values:
name = urllib.unquote(nv[0].replace('+', ' '))
value = urllib.unquote(nv[1].replace('+', ' '))
yield name, value
parse_urlencoded_stream = generatorToStream(parse_urlencoded_stream)
def parse_urlencoded(stream, maxMem=100*1024, maxFields=1024,
keep_blank_values=False, strict_parsing=False):
d = {}
numFields = 0
s=parse_urlencoded_stream(stream, maxMem, keep_blank_values, strict_parsing)
while 1:
datas = s.read()
if isinstance(datas, defer.Deferred):
datas = defer.waitForDeferred(datas)
yield datas
datas = datas.getResult()
if datas is None:
break
name, value = datas
numFields += 1
if numFields == maxFields:
raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
if name in d:
d[name].append(value)
else:
d[name] = [value]
yield d
return
parse_urlencoded = defer.deferredGenerator(parse_urlencoded)
if __name__ == '__main__':
d = parseMultipartFormData(
FileStream(open("upload.txt")), "----------0xKhTmLbOuNdArY")
from twisted.python import log
d.addErrback(log.err)
def pr(s):
print s
d.addCallback(pr)
__all__ = ['parseMultipartFormData', 'parse_urlencoded', 'parse_urlencoded_stream', 'MultipartMimeStream', 'MimeFormatError']
|