def test_load_http2_warc_convert_protocol(self): filename = self.get_test_file('http2.github.io.har') temp_filename = os.path.join(tempfile.gettempdir(), tempfile.gettempprefix() + '-http2.warc') try: # write then read same file with open(temp_filename, 'w+b') as fh: har2warc(filename, fh) fh.seek(0) ai = ArchiveIterator(fh, verify_http=True) record = next(ai) assert record.rec_type == 'warcinfo' record = next(ai) assert record.rec_type == 'response' # ensure protocol vonerted to HTTP/1.1 assert record.http_headers.protocol == 'HTTP/1.1' finally: os.remove(temp_filename)
def har2warc(self, filename, stream): out = self._har2warc_temp_file(filename) stream = codecs.getreader('utf-8')(stream) rec_title = os.path.basename(filename) har2warc(stream, out, filename + '.warc', rec_title) #writer = WARCWriter(out) #HarParser(stream, writer).parse(filename + '.warc', rec_title) size = out.tell() out.seek(0) return out, size
def har2warc(self, filename, stream): """Convert HTTP Archive format file to WARC archive. :param str filename: name of HAR file :param stream: file object (input) :returns: file object (output) and size of WARC archive :rtype: file object and int """ out = self._har2warc_temp_file(filename) stream = codecs.getreader('utf-8')(stream) rec_title = os.path.basename(filename) har2warc(stream, out, filename + '.warc', rec_title) #writer = WARCWriter(out) #HarParser(stream, writer).parse(filename + '.warc', rec_title) size = out.tell() out.seek(0) return out, size