def _init_warc_headers(self, uri, record_type, warc_headers_dict): warc_headers = StatusAndHeaders(self.warc_version, list(warc_headers_dict.items())) warc_headers.replace_header('WARC-Type', record_type) if not warc_headers.get_header('WARC-Record-ID'): warc_headers.add_header('WARC-Record-ID', self._make_warc_id()) if uri: warc_headers.replace_header('WARC-Target-URI', uri) if not warc_headers.get_header('WARC-Date'): warc_headers.add_header('WARC-Date', self._make_warc_date()) return warc_headers
def create_warcinfo_record(self, filename, info): warc_headers = StatusAndHeaders(self.warc_version, []) warc_headers.add_header('WARC-Type', 'warcinfo') warc_headers.add_header('WARC-Record-ID', self._make_warc_id()) if filename: warc_headers.add_header('WARC-Filename', filename) warc_headers.add_header('WARC-Date', self._make_warc_date()) warcinfo = BytesIO() for name, value in six.iteritems(info): if not value: continue line = name + ': ' + str(value) + '\r\n' warcinfo.write(line.encode('utf-8')) length = warcinfo.tell() warcinfo.seek(0) return self.create_warc_record('', 'warcinfo', warc_headers=warc_headers, payload=warcinfo, length=length)