Exemplo n.º 1
0
        def patched_start_response(status, headers, exc_info=None):
            # if self._should_handle(headers)
            wsgi_headers = Headers(headers)

            # If we're debugging, or the response already has an expires
            # header, just skip this.
            if not self.debug and "Expires" not in wsgi_headers:
                mime = wsgi_headers.get("Content-Type", "*").split(";")[0]

                # If the mime type is explicitly called out, use the expire
                # delay specified.
                if mime in self.expire_seconds:
                    expire_time = self.make_expire_time_for(mime)

                # If there's a catch-all wildcard delay, use that.
                elif "*" in self.expire_seconds:
                    expire_time = self.make_expire_time_for("*")

                # Otherwise, don't set the header.
                else:
                    expire_time = None

                if expire_time is not None:
                    log.debug("Adding expires header value: " + expire_time)
                    headers.append(("Expires", expire_time))

            return start_response(status, headers, exc_info)
Exemplo n.º 2
0
    def testMappingInterface(self):
        test = [("x", "y")]
        self.assertEqual(len(Headers([])), 0)
        self.assertEqual(len(Headers(test[:])), 1)
        self.assertEqual(Headers(test[:]).keys(), ["x"])
        self.assertEqual(Headers(test[:]).values(), ["y"])
        self.assertEqual(Headers(test[:]).items(), test)
        self.assertIsNot(Headers(test).items(), test)  # must be copy!

        h = Headers([])
        del h["foo"]  # should not raise an error

        h["Foo"] = "bar"
        for m in h.has_key, h.__contains__, h.get, h.get_all, h.__getitem__:
            self.assertTrue(m("foo"))
            self.assertTrue(m("Foo"))
            self.assertTrue(m("FOO"))
            self.assertFalse(m("bar"))

        self.assertEqual(h["foo"], "bar")
        h["foo"] = "baz"
        self.assertEqual(h["FOO"], "baz")
        self.assertEqual(h.get_all("foo"), ["baz"])

        self.assertEqual(h.get("foo", "whee"), "baz")
        self.assertEqual(h.get("zoo", "whee"), "whee")
        self.assertEqual(h.setdefault("foo", "whee"), "baz")
        self.assertEqual(h.setdefault("zoo", "whee"), "whee")
        self.assertEqual(h["foo"], "baz")
        self.assertEqual(h["zoo"], "whee")
    def testMappingInterface(self):
        test = [('x', 'y')]
        self.assertEqual(len(Headers()), 0)
        self.assertEqual(len(Headers([])), 0)
        self.assertEqual(len(Headers(test[:])), 1)
        self.assertEqual(Headers(test[:]).keys(), ['x'])
        self.assertEqual(Headers(test[:]).values(), ['y'])
        self.assertEqual(Headers(test[:]).items(), test)
        self.assertIsNot(Headers(test).items(), test)  # must be copy!

        h = Headers()
        del h['foo']  # should not raise an error

        h['Foo'] = 'bar'
        for m in h.__contains__, h.get, h.get_all, h.__getitem__:
            self.assertTrue(m('foo'))
            self.assertTrue(m('Foo'))
            self.assertTrue(m('FOO'))
            self.assertFalse(m('bar'))

        self.assertEqual(h['foo'], 'bar')
        h['foo'] = 'baz'
        self.assertEqual(h['FOO'], 'baz')
        self.assertEqual(h.get_all('foo'), ['baz'])

        self.assertEqual(h.get("foo", "whee"), "baz")
        self.assertEqual(h.get("zoo", "whee"), "whee")
        self.assertEqual(h.setdefault("foo", "whee"), "baz")
        self.assertEqual(h.setdefault("zoo", "whee"), "whee")
        self.assertEqual(h["foo"], "baz")
        self.assertEqual(h["zoo"], "whee")
Exemplo n.º 4
0
        def patched_start_response(status, headers, exc_info=None):
            # if self._should_handle(headers)
            wsgi_headers = Headers(headers)

            # If we're debugging, or the response already has an expires
            # header, just skip this.
            log.debug('Skipping expired headers' if self.debug else 'Calculating expires headers')
            if not self.debug and 'Expires' not in wsgi_headers:
                mime = wsgi_headers.get('Content-Type', '*').split(';')[0]
                log.debug('See mime type ' + mime)

                # If the mime type is explicitly called out, use the expire
                # delay specified.
                if mime in self.expire_seconds:
                    log.debug('Matched mimetype exactly.')
                    expire_time = self.make_expire_time_for(mime)

                # If there's a catch-all wildcard delay, use that.
                elif '*' in self.expire_seconds:
                    log.debug('Matched mimetype with universal.')
                    expire_time = self.make_expire_time_for('*')

                # Otherwise, don't set the header.
                else:
                    log.debug('No mimetype match.')
                    expire_time = None

                if expire_time is not None:
                    log.debug('Adding expires header value: ' + expire_time)
                    headers.append(('Expires', expire_time))

            log.debug('-'*60)
            return start_response(status, headers, exc_info)
Exemplo n.º 5
0
    def testMappingInterface(self):
        test = [('x','y')]
        self.assertEqual(len(Headers([])),0)
        self.assertEqual(len(Headers(test[:])),1)
        self.assertEqual(Headers(test[:]).keys(), ['x'])
        self.assertEqual(Headers(test[:]).values(), ['y'])
        self.assertEqual(Headers(test[:]).items(), test)
        self.assertFalse(Headers(test).items() is test)  # must be copy!

        h=Headers([])
        del h['foo']   # should not raise an error

        h['Foo'] = 'bar'
        for m in h.__contains__, h.get, h.get_all, h.__getitem__:
            self.assertTrue(m('foo'))
            self.assertTrue(m('Foo'))
            self.assertTrue(m('FOO'))
            self.assertFalse(m('bar'))

        self.assertEqual(h['foo'],'bar')
        h['foo'] = 'baz'
        self.assertEqual(h['FOO'],'baz')
        self.assertEqual(h.get_all('foo'),['baz'])

        self.assertEqual(h.get("foo","whee"), "baz")
        self.assertEqual(h.get("zoo","whee"), "whee")
        self.assertEqual(h.setdefault("foo","whee"), "baz")
        self.assertEqual(h.setdefault("zoo","whee"), "whee")
        self.assertEqual(h["foo"],"baz")
        self.assertEqual(h["zoo"],"whee")
Exemplo n.º 6
0
        def patched_start_response(status, headers, exc_info=None):
            # if self._should_handle(headers)
            wsgi_headers = Headers(headers)

            # If we're debugging, or the response already has an expires
            # header, just skip this.
            if not self.debug and 'Expires' not in wsgi_headers:
                mime = wsgi_headers.get('Content-Type', '*').split(';')[0]

                # If the mime type is explicitly called out, use the expire
                # delay specified.
                if mime in self.expire_seconds:
                    expire_time = self.make_expire_time_for(mime)

                # If there's a catch-all wildcard delay, use that.
                elif '*' in self.expire_seconds:
                    expire_time = self.make_expire_time_for('*')

                # Otherwise, don't set the header.
                else:
                    expire_time = None

                if expire_time is not None:
                    log.debug('Adding expires header value: ' + expire_time)
                    headers.append(('Expires', expire_time))

            return start_response(status, headers, exc_info)
Exemplo n.º 7
0
class Request:
    id: str
    environ: dict
    wsgi_input: BufferedReader
    path: str
    method: str
    port: str
    host: str
    protocol: str
    server_name: str
    query: dict
    headers: Headers
    body: Dict
    raw_body: bytes
    params: Dict[str, Any]

    def __init__(self, environ: dict):
        self.id = ''.join(random.choice(chars) for i in range(30))
        self.body = dict()
        self.environ = environ
        self.wsgi_input: BufferedReader = cast(BufferedReader,
                                               environ.get('wsgi.input'))
        self.path = environ.get('PATH_INFO', '')
        self.method = environ.get('REQUEST_METHOD', '')
        self.port = environ.get('PORT', '')
        self.host = environ.get('HTTP_HOST', '')
        self.protocol = environ.get('HTTP_PROTOCOL', '')
        self.server_name = environ.get('SERVER_NAME', '')
        self.query = parse_qs(environ.get('QUERY_STRING', ''))
        self.params = dict()
        self.headers = Headers()
        self._parse_http_headers()
        self._read_request_body()

    def _parse_http_headers(self: 'Request'):
        for key in self.environ.keys():
            replaced_key: str = key.replace('HTTP_', '')
            value = self.environ.get(key)

            if replaced_key in HTTP_HEADERS:
                final_key: str = capwords(replaced_key, '_').replace('_', '-')
                self.headers.add_header(final_key, value)

    def _read_request_body(self: 'Request'):
        try:
            request_body_size = int(self.headers.get(CONTENT_LENGTH, '0'))
        except ValueError:
            request_body_size = 0
        self.raw_body = self.wsgi_input.read(request_body_size)
Exemplo n.º 8
0
    def __call__(self, environ, start_response):
        key_morsel = Cookie(environ.get("HTTP_COOKIE", "")).get(self.toggle_key)
        # useful vars
        query = query_str2dict(environ.get("QUERY_STRING"))
        enable_by_cookie = key_morsel.value == self.enable_value if key_morsel else False
        enable_by_query = query.get(self.toggle_key) == self.enable_value
        # pop toggle_key from query dic to avoid case: '?_profile=on&_profile='
        disable = query.pop(self.toggle_key, None) == ""  # only can be disabled by query
        enable = not disable and (enable_by_query or enable_by_cookie)

        run_app, resp_body, saved_ss_args = self._intercept_call()

        # processing cookies and queries
        so = query.pop(self.SIMPLE_OUTPUT_TOGGLE_KEY, None)
        if so is not None:
            self.simple_output = so == "True"
        cookie_to_set = None
        if enable_by_query and not enable_by_cookie:
            cookie_to_set = "%s=%s; Path=/; HttpOnly" % (self.toggle_key, self.enable_value)
        elif disable:
            cookie_to_set = "%s=; Path=/; Max-Age=1; HttpOnly" % self.toggle_key

        if enable:
            start = time.time()
            profile = Profile()
            profile.runcall(run_app, environ)  # here we call the WSGI app
            elapsed = time.time() - start
        else:
            profile = elapsed = None  # for annoying IDE
            run_app(environ)

        status, headers = saved_ss_args[:2]
        headers_dic = Headers(headers)
        if cookie_to_set:
            headers_dic.add_header("Set-Cookie", cookie_to_set)

        # insert result into response
        content_type = headers_dic.get("Content-Type", "")
        if enable and status.startswith("200") and content_type.startswith("text/html"):
            environ["QUERY_STRING"] = dict2query_str(query)

            matched = _find_charset.match(content_type)
            encoding = matched.group(1) if matched else "ascii"
            rendered = self.render_result(profile, elapsed, environ).encode(encoding, "replace")
            resp_body = [insert_into_body(rendered, b"".join(resp_body))]
            headers_dic["Content-Length"] = str(len(resp_body[0]))
        start_response(status, headers, saved_ss_args[2] if len(saved_ss_args) == 3 else None)
        return resp_body
    def testBytes(self):
        h = Headers([(b"Content-Type", b"text/plain; charset=utf-8")])
        self.assertEqual("text/plain; charset=utf-8", h.get("Content-Type"))

        h[b"Foo"] = bytes(b"bar")
        self.assertEqual("bar", h.get("Foo"))
        self.assertEqual("bar", h.get(b"Foo"))

        h.setdefault(b"Bar", b"foo")
        self.assertEqual("foo", h.get("Bar"))
        self.assertEqual("foo", h.get(b"Bar"))

        h.add_header(b"content-disposition", b"attachment", filename=b"bud.gif")
        self.assertEqual('attachment; filename="bud.gif"', h.get("content-disposition"))

        del h["content-disposition"]
        self.assertTrue(b"content-disposition" not in h)
Exemplo n.º 10
0
    def testBytes(self):
        h = Headers([
            (b"Content-Type", b"text/plain; charset=utf-8"),
            ])
        self.assertEqual("text/plain; charset=utf-8", h.get("Content-Type"))

        h[b"Foo"] = bytes(b"bar")
        self.assertEqual("bar", h.get("Foo"))
        self.assertEqual("bar", h.get(b"Foo"))

        h.setdefault(b"Bar", b"foo")
        self.assertEqual("foo", h.get("Bar"))
        self.assertEqual("foo", h.get(b"Bar"))

        h.add_header(b'content-disposition', b'attachment',
            filename=b'bud.gif')
        self.assertEqual('attachment; filename="bud.gif"',
            h.get("content-disposition"))

        del h['content-disposition']
        self.assertTrue(b'content-disposition' not in h)
Exemplo n.º 11
0
class AzureFunctionsWsgi:
    "Convert between Azure Functions API and the WSGI protocol."

    def __init__(self, app, include_os_environ=True):
        self._app = app
        self._include_os_environ = include_os_environ
        self._req = None
        self._context = None

        self._status = None
        self._wsgi_headers = []
        self._azure_headers = None
        self._body = None
        self._errors = StringIO()
        self._environ = []

    def main(self,
             req: func.HttpRequest,
             context: func.Context = None) -> func.HttpResponse:
        self._req = req
        self._context = context

        self._get_body()
        self._setup_environ()

        buffer = [x for x in self._app(self._environ, self._start_response)]

        if self._errors.tell() > 0:
            self._errors.seek(0, 0)
            for line in self._errors.readline():
                logging.error(line)

        response_values = self._response_values()

        return func.HttpResponse(b''.join(buffer),
                                 headers=self._azure_headers,
                                 **response_values)

    def _get_body(self):
        body_encoding = 'utf-8'  # default

        body_content_type = self._req.headers.get('Content-Type')
        if body_content_type and 'charset=' in body_content_type:
            header_parts = body_content_type.split(';')
            for part in header_parts:
                directive, value = part.split('=', maxsplit=1)
                if 'charset' in directive:
                    body_encoding = value
                    break

        self._body = self._req.get_body().decode(body_encoding)

    def _setup_environ(self):
        req_url = urlparse(self._req.url)
        port = req_url.port
        if not port:
            if req_url.scheme == 'https':
                port = 443
            else:
                port = 80

        environ = {
            'REQUEST_METHOD': self._req.method,
            'SCRIPT_NAME': '',  # SCRIPT_NAME is always the root
            'PATH_INFO': req_url.path,
            'SERVER_NAME': req_url.hostname,
            'SERVER_PORT': str(port),
            'SERVER_PROTOCOL': 'HTTP/1.1',  # TODO
            'SERVER_SOFTWARE': 'azure-functions',
            'wsgi.version': (1, 0),
            'wsgi.url_scheme': req_url.scheme,
            'wsgi.input': BytesIO(self._body.encode()),
            'wsgi.errors': self._errors,
            'wsgi.multithread': True,
            'wsgi.multiprocess': False,
            'wsgi.run_once': False,
            'azure_functions.url': self._req.url,
        }

        if self._context:
            environ.update({
                'azure_functions.function_directory':
                self._context.function_directory,
                'azure_functions.function_name':
                self._context.function_name,
                'azure_functions.invocation_id':
                self._context.invocation_id,
            })

        if req_url.query:
            environ['QUERY_STRING'] = req_url.query

        passthru_headers = ['Content-Type', 'Content-Length']
        for header_name in passthru_headers:
            if header_name in self._req.headers:
                environ[self._header_name(
                    header_name)] = self._req.headers[header_name]

        for header, value in self._req.headers.items():
            environ['HTTP_' + self._header_name(header)] = str(value)

        if self._include_os_environ:
            environ.update(read_environ())

        # Workaround Content-Length bug when 'application/json'
        if environ.get('CONTENT_TYPE') == 'application/json':
            environ['CONTENT_LENGTH'] = str(len(self._body))

        self._environ = environ

    def _header_name(self, header_name):
        return header_name.replace('-', '_').upper()

    def _start_response(self, status, headers):
        self._status = status
        self._wsgi_headers = headers

    def _response_values(self):
        self._azure_headers = Headers(self._wsgi_headers)

        return {
            'status_code': int(self._status.split(' ')[0]),
            'mimetype': self._azure_headers.get('Content-Type', 'text/plain'),
            'charset': 'utf-8',
        }
Exemplo n.º 12
0
class MultipartPart(object):
    def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18, charset="latin1"):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = tob("")
        self.disposition, self.name, self.filename = None, None, None
        self.content_type, self.charset = None, charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=""):
        if self.file:
            return self.write_body(line, nl)
        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset or "latin1")
        if not nl:
            raise MultipartError("Unexpected end of line in header.")
        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in " \t" and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ":" not in line:
                raise MultipartError("Syntax error in header: No colon.")
            name, value = line.split(":", 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl:
            return  # This does not even flush the buffer
        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl
        if self.content_length > 0 and self.size > self.content_length:
            raise MultipartError("Size of body exceeds Content-Length header.")
        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            # TODO: What about non-file uploads that exceed the memfile_limit?
            self.file, old = TemporaryFile(mode="w+b"), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        cdis = self.headers.get("Content-Disposition", "")
        ctype = self.headers.get("Content-Type", "")
        clen = self.headers.get("Content-Length", "-1")
        if not cdis:
            raise MultipartError("Content-Disposition header is missing.")
        self.disposition, self.options = parse_options_header(cdis)
        self.name = self.options.get("name")
        self.filename = self.options.get("filename")
        self.content_type, options = parse_options_header(ctype)
        self.charset = options.get("charset") or self.charset
        self.content_length = int(self.headers.get("Content-Length", "-1"))

    def is_buffered(self):
        """ Return true if the data is fully buffered in memory."""
        return isinstance(self.file, BytesIO)

    @property
    def value(self):
        """ Data decoded with the specified charset """

        return self.raw.decode(self.charset)

    @property
    def raw(self):
        """ Data without decoding """
        pos = self.file.tell()
        self.file.seek(0)
        try:
            val = self.file.read()
        except IOError:
            raise
        finally:
            self.file.seek(pos)
        return val

    def save_as(self, path):
        fp = open(path, "wb")
        pos = self.file.tell()
        try:
            self.file.seek(0)
            size = copy_file(self.file, fp)
        finally:
            self.file.seek(pos)
        return size
Exemplo n.º 13
0
class MultipartPart(object):

    def __init__(self, buffer_size=2 ** 16, memfile_limit=2 ** 18,
                 charset='latin1'):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = tob('')
        self.disposition, self.name, self.filename = None, None, None
        self.content_type, self.charset = None, charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=''):
        if self.file:
            return self.write_body(line, nl)
        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset or 'latin1')
        if not nl:
            raise MalformattedError('Unexpected end of line in header.')
        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in ' \t' and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ':' not in line:
                raise MalformattedError("Syntax error in header: No colon.")
            name, value = line.split(':', 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl:
            return  # This does not even flush the buffer
        if self.content_transfer_encoding and not nl:
            raise MalformattedError('Line too long on transfer_encoded chunk.')
        if self.content_transfer_encoding == 'quoted-printable':
            if line.endswith(tob('=')):
                nl = tob('')
            line = quopri.decodestring(line)
        elif self.content_transfer_encoding == 'base64':
            line, nl = binascii.a2b_base64(line), tob('')
        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl
        if self.content_length > 0 and self.size > self.content_length:
            raise MalformattedError('Size of body exceeds Content-Length header.')
        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            self.file, old = TemporaryFile(mode='w+b'), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        cdis = self.headers.get('Content-Disposition', '')
        ctype = self.headers.get('Content-Type', '')
        if not cdis:
            raise MalformattedError('Content-Disposition header is missing.')
        self.disposition, self.options = parse_options_header(cdis)
        self.name = self.options.get('name')
        self.filename = self.options.get('filename')
        self.content_type, options = parse_options_header(ctype)
        self.charset = options.get('charset') or self.charset
        self.content_length = int(self.headers.get('Content-Length', '-1'))
        self.content_transfer_encoding = \
                self.headers.get('Content-Transfer-Encoding')
        if self.content_transfer_encoding not in \
                [None, 'base64', 'quoted-printable']:
            raise MalformattedError('invalid Content-Transfer-Encoding')

    def is_buffered(self):
        ''' Return true if the data is fully buffered in memory.'''
        return isinstance(self.file, BytesIO)

    def value(self, limit):
        ''' Data decoded with the specified charset '''
        pos = self.file.tell()
        try:
            self.file.seek(0)
            val = self.file.read(limit)
            if self.file.read(1):
                raise MemoryLimitError("Request too big. Increase mem_limit.")
        finally:
            self.file.seek(pos)
        return val.decode(self.charset)

    def save_as(self, path):
        fp = open(path, 'wb')
        pos = self.file.tell()
        try:
            self.file.seek(0)
            size = copy_file(self.file, fp)
        finally:
            self.file.seek(pos)
        return size
Exemplo n.º 14
0
class MultipartPart(object):

    def __init__(self, buffer_size=2**16, memfile_limit=2**18, charset='latin1'):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = tob('')
        self.disposition, self.name, self.filename = None, None, None
        self.content_type, self.charset = None, charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=''):
        if self.file:
            return self.write_body(line, nl)
        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset or 'latin1')
        if not nl:
            raise MultipartError('Unexpected end of line in header.')
        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in ' \t' and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ':' not in line:
                raise MultipartError("Syntax error in header: No colon.")
            name, value = line.split(':', 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl:
            return  # This does not even flush the buffer
        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl
        if self.content_length > 0 and self.size > self.content_length:
            raise MultipartError('Size of body exceeds Content-Length header.')
        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            # TODO: What about non-file uploads that exceed the memfile_limit?
            self.file, old = TemporaryFile(mode='w+b'), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        cdis = self.headers.get('Content-Disposition', '')
        ctype = self.headers.get('Content-Type', '')
        clen = self.headers.get('Content-Length', '-1')
        if not cdis:
            raise MultipartError('Content-Disposition header is missing.')
        self.disposition, self.options = parse_options_header(cdis)
        self.name = self.options.get('name')
        self.filename = self.options.get('filename')
        self.content_type, options = parse_options_header(ctype)
        self.charset = options.get('charset') or self.charset
        self.content_length = int(self.headers.get('Content-Length', '-1'))

    def is_buffered(self):
        ''' Return true if the data is fully buffered in memory.'''
        return isinstance(self.file, BytesIO)

    @property
    def value(self):
        ''' Data decoded with the specified charset '''
        pos = self.file.tell()
        self.file.seek(0)
        val = self.file.read()
        self.file.seek(pos)
        return val.decode(self.charset)

    def save_as(self, path):
        fp = open(path, 'wb')
        pos = self.file.tell()
        try:
            self.file.seek(0)
            size = copy_file(self.file, fp)
        finally:
            self.file.seek(pos)
        return size
Exemplo n.º 15
0
class MultipartPart(object):
    def __init__(self,
                 buffer_size=2**16,
                 memfile_limit=2**18,
                 charset="latin1"):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = b""
        self.disposition = None
        self.name = None
        self.filename = None
        self.content_type = None
        self.charset = charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=""):
        if self.file:
            return self.write_body(line, nl)

        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset)

        if not nl:
            raise MultipartError("Unexpected end of line in header.")

        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in " \t" and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ":" not in line:
                raise MultipartError("Syntax error in header: No colon.")

            name, value = line.split(":", 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl:
            return  # This does not even flush the buffer

        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl

        if self.content_length > 0 and self.size > self.content_length:
            raise MultipartError("Size of body exceeds Content-Length header.")

        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            # TODO: What about non-file uploads that exceed the memfile_limit?
            self.file, old = TemporaryFile(mode="w+b"), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        content_disposition = self.headers.get("Content-Disposition", "")
        content_type = self.headers.get("Content-Type", "")

        if not content_disposition:
            raise MultipartError("Content-Disposition header is missing.")

        self.disposition, self.options = parse_options_header(
            content_disposition)
        self.name = self.options.get("name")
        self.filename = self.options.get("filename")
        self.content_type, options = parse_options_header(content_type)
        self.charset = options.get("charset") or self.charset
        self.content_length = int(self.headers.get("Content-Length", "-1"))

    def is_buffered(self):
        """ Return true if the data is fully buffered in memory."""
        return isinstance(self.file, BytesIO)

    @property
    def value(self):
        """ Data decoded with the specified charset """

        return self.raw.decode(self.charset)

    @property
    def raw(self):
        """ Data without decoding """
        pos = self.file.tell()
        self.file.seek(0)

        try:
            val = self.file.read()
        except IOError:
            raise
        finally:
            self.file.seek(pos)

        return val

    def save_as(self, path):
        with open(path, "wb") as fp:
            pos = self.file.tell()

            try:
                self.file.seek(0)
                size = copy_file(self.file, fp)
            finally:
                self.file.seek(pos)

        return size

    def close(self):
        if self.file:
            self.file.close()
            self.file = False
Exemplo n.º 16
0
class MultipartPart(object):
    def __init__(self,
                 buffer_size=2**16,
                 memfile_limit=2**18,
                 charset='latin1'):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = tob('')
        self.disposition, self.name, self.filename = None, None, None
        self.content_type, self.charset = None, charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=''):
        if self.file:
            return self.write_body(line, nl)
        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset or 'latin1')
        if not nl: raise MultipartError('Unexpected end of line in header.')
        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in ' \t' and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ':' not in line:
                raise MultipartError("Syntax error in header: No colon.")
            name, value = line.split(':', 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl: return  # This does not even flush the buffer
        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl
        if self.content_length > 0 and self.size > self.content_length:
            raise MultipartError('Size of body exceeds Content-Length header.')
        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            # TODO: What about non-file uploads that exceed the memfile_limit?
            self.file, old = TemporaryFile(mode='w+b'), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        cdis = self.headers.get('Content-Disposition', '')
        ctype = self.headers.get('Content-Type', '')
        clen = self.headers.get('Content-Length', '-1')
        if not cdis:
            raise MultipartError('Content-Disposition header is missing.')
        self.disposition, self.options = parse_options_header(cdis)
        self.name = self.options.get('name')
        self.filename = self.options.get('filename')
        self.content_type, options = parse_options_header(ctype)
        self.charset = options.get('charset') or self.charset
        self.content_length = int(self.headers.get('Content-Length', '-1'))

    def is_buffered(self):
        ''' Return true if the data is fully buffered in memory.'''
        return isinstance(self.file, BytesIO)

    @property
    def value(self):
        ''' Data decoded with the specified charset '''

        return self.raw.decode(self.charset)

    @property
    def raw(self):
        ''' Data without decoding '''
        pos = self.file.tell()
        self.file.seek(0)
        try:
            val = self.file.read()
        except IOError:
            raise
        finally:
            self.file.seek(pos)
        return val

    def save_as(self, path):
        fp = open(path, 'wb')
        pos = self.file.tell()
        try:
            self.file.seek(0)
            size = copy_file(self.file, fp)
        finally:
            self.file.seek(pos)
        return size
Exemplo n.º 17
0
class Url(Packer, object):
    def __init__(self,
                 id,
                 url,
                 cookie='',
                 headers=HEADERS_CHROME,
                 host=None,
                 port=None,
                 path=None,
                 protocol=None,
                 proxy=None,
                 max_thread=-1,
                 range_format='Range: bytes=%d-%d'):

        self.id = id

        self.url = url

        self.host = host if host is not None else getattr(self, 'host', None)
        self.port = port if port is not None else getattr(self, 'port', None)

        self.path = path if path is not None else getattr(self, 'path', None)
        self.protocol = protocol if protocol is not None else getattr(
            self, 'protocol', None)

        self.cookie = cookie

        if isinstance(headers, Headers):
            self.headers = headers
        elif isinstance(headers, dict):
            self.headers = Headers(list(headers.items()))
        else:
            raise ValueError('headers must be an instance of dict or Headers')

        self.etag = None

        self.proxy = proxy
        self.target = Target()

        self.max_thread = max_thread

        self.range_format = range_format

    def __eq__(self, other):
        if isinstance(other, Url):
            return self.url == other.url and \
                self.cookie == other.cookie and \
                self.proxy == other.proxy and \
                self.range_format == other.range_format
        else:
            object.__eq__(self, other)

    def config(self):
        pass

    def getContentSize(self):
        if self.target.code == 200 and int(
                self.target.headers.get('Content-Length', -1)) != -1:
            return int(self.target.headers.get('Content-Length'))
        elif self.target.code == 206 and self.target.headers.get(
                'Content-Range'):
            return int(self.target.headers.get('Content-Range').split('/')[-1])
        else:
            return -1

    def getFileName(self):

        ctd = self.target.headers.get('Content-Disposition')
        if ctd is not None:
            filename = re.findall(r'filename="(.*?)"', ctd)
            if filename:
                return filename[0]

        filename = self.path.split('?')[0].split('/')[-1]

        if filename != '':
            if '.' not in filename or filename.split('.')[-1] == '':

                extension = _content_type(
                    self.target.headers.get('Content-Type'))
                filename = filename + extension

        else:
            filename = None

        return filename

    def reload(self):
        self.target.load(self.url)

    def __setattr__(self, key, value):
        object.__setattr__(self, key, value)
        if key == 'url':
            self.protocol, s1 = splittype(self.url)
            if s1:
                s2, self.path = splithost(s1)
                if s2:
                    self.host, port = splitport(s2)
                    self.port = int(port) if port is not None else None

            if not getattr(self, 'port', None):
                if self.protocol == 'http':
                    self.port = 80
                elif self.protocol == 'https':
                    self.port = 443

    def activate(self):
        res, cookie_dict = self.__request__()
        # if res.getcode() == 200 or res.getcode() == 206:
        headers_items = ()
        if sys.version_info < (3, 0):
            headers_items = res.info().items()

        if sys.version_info >= (3, 0):
            headers_items = res.getheaders()
        self.target.update(res.geturl(), headers_items, res.getcode())
        # else:
        #     raise Exception('UrlNoRespond or UrlError')

    def __request__(self):

        Cookiejar = CookieJar()
        opener = build_opener(HTTPCookieProcessor(Cookiejar))
        _header = dict(self.headers.items())
        if self.cookie:
            _header.update({'Cookie': self.cookie})
        req = Request(self.url, headers=_header, origin_req_host=self.host)
        error_counter = 0
        while error_counter < 3:
            try:
                res = opener.open(req)
                break
            except Exception as e:
                # traceback.print_exc()
                error_counter += 1
            time.sleep(0.5)
        else:
            raise Exception('UrlNotRespond')

        return res, Cookiejar._cookies

    def getHeader(self, name, default=None):
        return self.headers.get(name, default)

    def __packet_params__(self):
        return [
            'id', 'url', 'host', 'port', 'protocal', 'cookie', 'etag', 'proxy',
            'max_thread', 'range_format', 'headers'
        ]
Exemplo n.º 18
0
class MultipartPart(object):
    def __init__(self,
                 buffer_size=2**16,
                 memfile_limit=2**18,
                 charset='latin1'):
        self.headerlist = []
        self.headers = None
        self.file = False
        self.size = 0
        self._buf = tob('')
        self.disposition, self.name, self.filename = None, None, None
        self.content_type, self.charset = None, charset
        self.memfile_limit = memfile_limit
        self.buffer_size = buffer_size

    def feed(self, line, nl=''):
        if self.file:
            return self.write_body(line, nl)
        return self.write_header(line, nl)

    def write_header(self, line, nl):
        line = line.decode(self.charset or 'latin1')
        if not nl:
            raise MultipartError('Unexpected end of line in header.')
        if not line.strip():  # blank line -> end of header segment
            self.finish_header()
        elif line[0] in ' \t' and self.headerlist:
            name, value = self.headerlist.pop()
            self.headerlist.append((name, value + line.strip()))
        else:
            if ':' not in line:
                raise MultipartError("Syntax error in header: No colon.")
            name, value = line.split(':', 1)
            self.headerlist.append((name.strip(), value.strip()))

    def write_body(self, line, nl):
        if not line and not nl:
            return  # This does not even flush the buffer
        if self.content_transfer_encoding and not nl:
            raise MultipartError('Line too long on transfer_encoded chunk.')
        if self.content_transfer_encoding == 'quoted-printable':
            if line.endswith(tob('=')):
                nl = tob('')
            line = quopri.decodestring(line)
        elif self.content_transfer_encoding == 'base64':
            line, nl = binascii.a2b_base64(line), tob('')
        self.size += len(line) + len(self._buf)
        self.file.write(self._buf + line)
        self._buf = nl
        if self.content_length > 0 and self.size > self.content_length:
            raise MultipartError('Size of body exceeds Content-Length header.')
        if self.size > self.memfile_limit and isinstance(self.file, BytesIO):
            self.file, old = TemporaryFile(mode='w+b'), self.file
            old.seek(0)
            copy_file(old, self.file, self.size, self.buffer_size)

    def finish_header(self):
        self.file = BytesIO()
        self.headers = Headers(self.headerlist)
        cdis = self.headers.get('Content-Disposition', '')
        ctype = self.headers.get('Content-Type', '')
        if not cdis:
            raise MultipartError('Content-Disposition header is missing.')
        self.disposition, self.options = parse_options_header(cdis)
        self.name = self.options.get('name')
        self.filename = self.options.get('filename')
        self.content_type, options = parse_options_header(ctype)
        self.charset = options.get('charset') or self.charset
        self.content_length = int(self.headers.get('Content-Length', '-1'))
        self.content_transfer_encoding = \
                self.headers.get('Content-Transfer-Encoding')
        if self.content_transfer_encoding not in \
                [None, 'base64', 'quoted-printable']:
            raise MultipartError('invalid Content-Transfer-Encoding')

    def is_buffered(self):
        ''' Return true if the data is fully buffered in memory.'''
        return isinstance(self.file, BytesIO)

    def value(self, limit):
        ''' Data decoded with the specified charset '''
        pos = self.file.tell()
        try:
            self.file.seek(0)
            val = self.file.read(limit)
            if self.file.read(1):
                raise MultipartError("Request too big. Increase mem_limit.")
        finally:
            self.file.seek(pos)
        return val.decode(self.charset)

    def save_as(self, path):
        fp = open(path, 'wb')
        pos = self.file.tell()
        try:
            self.file.seek(0)
            size = copy_file(self.file, fp)
        finally:
            self.file.seek(pos)
        return size