def parse(self): """ Parse the POST data and break it into a FILES MultiValueDict and a POST MultiValueDict. Returns a tuple containing the POST and FILES dictionary, respectively. """ # We have to import QueryDict down here to avoid a circular import. from london.http import QueryDict encoding = self._encoding handlers = self._upload_handlers # HTTP spec says that Content-Length >= 0 is valid # handling content-length == 0 before continuing if self._content_length == 0: return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict() # See if the handler will want to take care of the parsing. # This allows overriding everything if somebody wants it. for handler in handlers: result = handler.handle_raw_input(self._input_data, self._meta, self._content_length, self._boundary, encoding) if result is not None: return result[0], result[1] # Create the data structures to be used later. self._post = QueryDict('', mutable=True) self._files = MultiValueDict() # Instantiate the parser and stream: stream = LazyStream(ChunkIter(self._input_data, self._chunk_size)) # Whether or not to signal a file-completion at the beginning of the loop. old_field_name = None counters = [0] * len(handlers) try: for item_type, meta_data, field_stream in Parser(stream, self._boundary): if old_field_name: # We run this at the beginning of the next loop # since we cannot be sure a file is complete until # we hit the next boundary/part of the multipart content. self.handle_file_complete(old_field_name, counters) old_field_name = None try: disposition = meta_data['content-disposition'][1] field_name = disposition['name'].strip() except (KeyError, IndexError, AttributeError): continue transfer_encoding = meta_data.get('content-transfer-encoding') if transfer_encoding is not None: transfer_encoding = transfer_encoding[0].strip() field_name = force_unicode(field_name, encoding, errors='replace') if item_type == FIELD: # This is a post field, we can just set it in the post if transfer_encoding == 'base64': raw_data = field_stream.read() try: data = str(raw_data).decode('base64') except: data = raw_data else: data = field_stream.read() self._post.appendlist(field_name, force_unicode(data, encoding, errors='replace')) elif item_type == FILE: # This is a file, use the handler... file_name = disposition.get('filename') if not file_name: continue file_name = force_unicode(file_name, encoding, errors='replace') file_name = self.IE_sanitize(unescape_entities(file_name)) content_type = meta_data.get('content-type', ('',))[0].strip() try: charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) except: charset = None try: content_length = int(meta_data.get('content-length')[0]) except (IndexError, TypeError, ValueError): content_length = None counters = [0] * len(handlers) try: for handler in handlers: try: handler.new_file(field_name, file_name, content_type, content_length, charset) except StopFutureHandlers: break for chunk in field_stream: if transfer_encoding == 'base64': # We only special-case base64 transfer encoding try: chunk = str(chunk).decode('base64') except BaseException as e: # Since this is only a chunk, any error is an unfixable error. raise MultiPartParserError("Could not decode base64 data: %r" % e) for i, handler in enumerate(handlers): chunk_length = len(chunk) chunk = handler.receive_data_chunk(chunk, counters[i]) counters[i] += chunk_length if chunk is None: # If the chunk received by the handler is None, then don't continue. break except SkipFile as e: # Just use up the rest of this file... exhaust(field_stream) else: # Handle file upload completions on next iteration. old_field_name = field_name else: # If this is neither a FIELD or a FILE, just exhaust the stream. exhaust(stream) except StopUpload as e: if not e.connection_reset: exhaust(self._input_data) else: # Make sure that the request data is all fed exhaust(self._input_data) # Signal that the upload has completed. for handler in handlers: retval = handler.upload_complete() if retval: break return self._post, self._files
class MultiPartParser(object): """ A rfc2388 multipart/form-data parser. ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. """ def __init__(self, META, input_data, upload_handlers, encoding=None): """ Initialize the MultiPartParser object. :META: The standard ``META`` dictionary in Django request objects. :input_data: The raw post data, as a file-like object. :upload_handler: An UploadHandler instance that performs operations on the uploaded data. :encoding: The encoding with which to treat the incoming data. """ # # Content-Type should containt multipart and the boundary information. # content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) if not content_type.startswith('multipart/'): raise MultiPartParserError('Invalid Content-Type: %s' % content_type) # Parse the header to get the boundary to split the parts. ctypes, opts = parse_header(content_type) boundary = opts.get('boundary') if not boundary or not cgi.valid_boundary(boundary): raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) # Content-Length should contain the length of the body we are about # to receive. try: content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) except (ValueError, TypeError): content_length = 0 if content_length < 0: # This means we shouldn't continue...raise an error. raise MultiPartParserError("Invalid content length: %r" % content_length) self._boundary = boundary self._input_data = input_data # For compatibility with low-level network APIs (with 32-bit integers), # the chunk size should be < 2^31, but still divisible by 4. possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size] self._chunk_size = min([2**31-4] + possible_sizes) self._meta = META self._encoding = encoding or settings.DEFAULT_CHARSET self._content_length = content_length self._upload_handlers = upload_handlers def parse(self): """ Parse the POST data and break it into a FILES MultiValueDict and a POST MultiValueDict. Returns a tuple containing the POST and FILES dictionary, respectively. """ # We have to import QueryDict down here to avoid a circular import. from london.http import QueryDict encoding = self._encoding handlers = self._upload_handlers # HTTP spec says that Content-Length >= 0 is valid # handling content-length == 0 before continuing if self._content_length == 0: return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict() # See if the handler will want to take care of the parsing. # This allows overriding everything if somebody wants it. for handler in handlers: result = handler.handle_raw_input(self._input_data, self._meta, self._content_length, self._boundary, encoding) if result is not None: return result[0], result[1] # Create the data structures to be used later. self._post = QueryDict('', mutable=True) self._files = MultiValueDict() # Instantiate the parser and stream: stream = LazyStream(ChunkIter(self._input_data, self._chunk_size)) # Whether or not to signal a file-completion at the beginning of the loop. old_field_name = None counters = [0] * len(handlers) try: for item_type, meta_data, field_stream in Parser(stream, self._boundary): if old_field_name: # We run this at the beginning of the next loop # since we cannot be sure a file is complete until # we hit the next boundary/part of the multipart content. self.handle_file_complete(old_field_name, counters) old_field_name = None try: disposition = meta_data['content-disposition'][1] field_name = disposition['name'].strip() except (KeyError, IndexError, AttributeError): continue transfer_encoding = meta_data.get('content-transfer-encoding') if transfer_encoding is not None: transfer_encoding = transfer_encoding[0].strip() field_name = force_unicode(field_name, encoding, errors='replace') if item_type == FIELD: # This is a post field, we can just set it in the post if transfer_encoding == 'base64': raw_data = field_stream.read() try: data = str(raw_data).decode('base64') except: data = raw_data else: data = field_stream.read() self._post.appendlist(field_name, force_unicode(data, encoding, errors='replace')) elif item_type == FILE: # This is a file, use the handler... file_name = disposition.get('filename') if not file_name: continue file_name = force_unicode(file_name, encoding, errors='replace') file_name = self.IE_sanitize(unescape_entities(file_name)) content_type = meta_data.get('content-type', ('',))[0].strip() try: charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) except: charset = None try: content_length = int(meta_data.get('content-length')[0]) except (IndexError, TypeError, ValueError): content_length = None counters = [0] * len(handlers) try: for handler in handlers: try: handler.new_file(field_name, file_name, content_type, content_length, charset) except StopFutureHandlers: break for chunk in field_stream: if transfer_encoding == 'base64': # We only special-case base64 transfer encoding try: chunk = str(chunk).decode('base64') except BaseException as e: # Since this is only a chunk, any error is an unfixable error. raise MultiPartParserError("Could not decode base64 data: %r" % e) for i, handler in enumerate(handlers): chunk_length = len(chunk) chunk = handler.receive_data_chunk(chunk, counters[i]) counters[i] += chunk_length if chunk is None: # If the chunk received by the handler is None, then don't continue. break except SkipFile as e: # Just use up the rest of this file... exhaust(field_stream) else: # Handle file upload completions on next iteration. old_field_name = field_name else: # If this is neither a FIELD or a FILE, just exhaust the stream. exhaust(stream) except StopUpload as e: if not e.connection_reset: exhaust(self._input_data) else: # Make sure that the request data is all fed exhaust(self._input_data) # Signal that the upload has completed. for handler in handlers: retval = handler.upload_complete() if retval: break return self._post, self._files def handle_file_complete(self, old_field_name, counters): """ Handle all the signalling that takes place when a file is complete. """ for i, handler in enumerate(self._upload_handlers): file_obj = handler.file_complete(counters[i]) if file_obj: # If it returns a file object, then set the files dict. self._files.appendlist(force_unicode(old_field_name, self._encoding, errors='replace'), file_obj) break def IE_sanitize(self, filename): """Cleanup filename from Internet Explorer full paths.""" return filename and filename[filename.rfind("\\")+1:].strip()