示例#1
0
    def test_post_extract_wrong_method(self):
        mq = MethodQueryCanonicalizer('PUT',
                                      'application/x-www-form-urlencoded',
                                      len(self.post_data),
                                      BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'
示例#2
0
    def test_post_extract_1(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                len(self.post_data), BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/?foo=bar&dir=/baz'

        assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&foo=bar&dir=/baz'
示例#3
0
    def test_post_extract_1(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                len(self.post_data), BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/?foo=bar&dir=/baz'

        assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&foo=bar&dir=/baz'
示例#4
0
    def test_post_extract_no_boundary_in_multipart_form_mimetype(self):
        mq = MethodQueryCanonicalizer('POST', 'multipart/form-data',
                                      len(self.post_data),
                                      BytesIO(self.post_data))

        assert mq.append_query(
            'http://example.com/'
        ) == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
示例#5
0
    def test_post_extract_json(self):
        post_data = b'{"a": "b", "c": {"a": 2}, "d": "e"}'
        mq = MethodQueryCanonicalizer('POST', 'application/json',
                                      len(post_data), BytesIO(post_data))

        assert mq.append_query(
            'http://example.com/'
        ) == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e'
示例#6
0
    def test_post_extract_length_too_short(self):
        mq = MethodQueryCanonicalizer('POST',
                                      'application/x-www-form-urlencoded',
                                      len(self.post_data) - 4,
                                      BytesIO(self.post_data))

        assert mq.append_query(
            'http://example.com/') == 'http://example.com/?foo=bar&dir=%2'
示例#7
0
    def test_post_extract_non_form_data_2(self):
        mq = MethodQueryCanonicalizer('POST', 'text/plain',
                                      len(self.post_data),
                                      BytesIO(self.post_data))

        #base64 encoded data
        assert mq.append_query(
            'http://example.com/pathbar?id=123'
        ) == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
示例#8
0
    def test_post_extract_non_form_data_1(self):
        mq = MethodQueryCanonicalizer('POST', 'application/octet-stream',
                                      len(self.post_data),
                                      BytesIO(self.post_data))

        #base64 encoded data
        assert mq.append_query(
            'http://example.com/'
        ) == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
示例#9
0
    def test_post_extract_malformed_form_data(self):
        mq = MethodQueryCanonicalizer('POST',
                                      'application/x-www-form-urlencoded',
                                      len(self.binary_post_data),
                                      BytesIO(self.binary_post_data))

        #base64 encoded data
        assert mq.append_query(
            'http://example.com/'
        ) == 'http://example.com/?__wb_method=POST&__wb_post_data=gTZsYEygNFAO4HICtYkZAGZQ2w6wAiw='
示例#10
0
    def test_post_extract_length_invalid_ignore(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                0, BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'

        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                'abc', BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'
示例#11
0
    def test_amf_parse(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-amf', 0, BytesIO())

        req = Request(target='t', body="")
        ev_1 = Envelope(AMF3)
        ev_1['/0'] = req

        req = Request(target='t', body="alt_content")
        ev_2 = Envelope(AMF3)
        ev_2['/0'] = req

        assert mq.amf_parse(encode(ev_1).getvalue(), None) != \
               mq.amf_parse(encode(ev_2).getvalue(), None)
示例#12
0
    def test_amf_parse(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-amf', 0, BytesIO())

        req = Request(target='t', body="")
        ev_1 = Envelope(AMF3)
        ev_1['/0'] = req

        req = Request(target='t', body="alt_content")
        ev_2 = Envelope(AMF3)
        ev_2['/0'] = req

        assert mq.amf_parse(encode(ev_1).getvalue(), None) != \
               mq.amf_parse(encode(ev_2).getvalue(), None)
示例#13
0
    def test_post_extract_length_invalid_ignore(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                0, BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'

        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                'abc', BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'
示例#14
0
    def create_record_iter(self, raw_iter):
        append_post = self.options.get('append_post')
        include_all = self.options.get('include_all')
        surt_ordered = self.options.get('surt_ordered', True)
        minimal = self.options.get('minimal')

        if append_post and minimal:
            raise Exception('Sorry, minimal index option and ' +
                            'append POST options can not be used together')

        for record in raw_iter:
            entry = None

            if not include_all and not minimal and (
                    record.http_headers.get_statuscode() == '-'):
                continue

            if record.rec_type == 'arc_header':
                continue

            if record.format == 'warc':
                if (record.rec_type in ('request', 'warcinfo')
                        and not include_all and not append_post):
                    continue

                elif (not include_all
                      and record.content_type == 'application/warc-fields'):
                    continue

                entry = self.parse_warc_record(record)
            elif record.format == 'arc':
                entry = self.parse_arc_record(record)

            if not entry:
                continue

            if entry.get('url') and not entry.get('urlkey'):
                entry['urlkey'] = canonicalize(entry['url'], surt_ordered)

            compute_digest = False

            if (entry.get('digest', '-') == '-' and record.rec_type
                    not in ('revisit', 'request', 'warcinfo')):

                compute_digest = True

            elif not minimal and record.rec_type == 'request' and append_post:
                method = record.http_headers.protocol
                len_ = record.http_headers.get_header('Content-Length')

                post_query = MethodQueryCanonicalizer(
                    method, entry.get('_content_type'), len_,
                    record.raw_stream)

                entry['_post_query'] = post_query

            entry.record = record

            self.begin_payload(compute_digest, entry)

            while True:
                buff = record.raw_stream.read(BUFF_SIZE)
                if not buff:
                    break
                self.handle_payload(buff)

            raw_iter.read_to_end(record)

            entry.set_rec_info(*raw_iter.member_info)
            self.end_payload(entry)

            yield entry
示例#15
0
    def test_post_extract_non_form_data_1(self):
        mq = MethodQueryCanonicalizer('POST', 'application/octet-stream',
                                len(self.post_data), BytesIO(self.post_data))

        #base64 encoded data
        assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
示例#16
0
 def test_options(self):
     mq = MethodQueryCanonicalizer('OPTIONS', '', 0, BytesIO())
     assert mq.append_query(
         'http://example.com/') == 'http://example.com/?__wb_method=OPTIONS'
示例#17
0
 def test_head(self):
     mq = MethodQueryCanonicalizer('HEAD', '', 0, BytesIO())
     assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=head'
示例#18
0
 def test_options(self):
     mq = MethodQueryCanonicalizer('OPTIONS', '', 0, BytesIO())
     assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=options'
示例#19
0
    def test_post_extract_length_too_short(self):
        mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
                                len(self.post_data) - 4, BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/?foo=bar&dir=%2'
示例#20
0
    def test_post_extract_non_form_data_2(self):
        mq = MethodQueryCanonicalizer('POST', 'text/plain',
                                len(self.post_data), BytesIO(self.post_data))

        #base64 encoded data
        assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
示例#21
0
 def test_head(self):
     mq = MethodQueryCanonicalizer('HEAD', '', 0, BytesIO())
     assert mq.append_query(
         'http://example.com/') == 'http://example.com/?__wb_method=HEAD'
示例#22
0
    def test_post_extract_wrong_method(self):
        mq = MethodQueryCanonicalizer('PUT', 'application/x-www-form-urlencoded',
                                len(self.post_data), BytesIO(self.post_data))

        assert mq.append_query('http://example.com/') == 'http://example.com/'