示例#1
0
class IPFSWARCRecorder(BaseWARCRecorder):
    def __init__(self, warcdir, ipfs, redis):
        super(IPFSWARCRecorder, self).__init__()
        self.warcdir = warcdir
        self.ipfs = ipfs
        self.redisindex = RedisIndexer(redis, 'ipfs:cdxj')

        # experimental dedup support
        #self.dedup = self.redisindex

        try:
            os.makedirs(warcdir)
        except:
            pass

    def write_records(self):
        resp_uuid = str(uuid.uuid1())
        resp_id = self._make_warc_id(resp_uuid)

        req_uuid = str(uuid.uuid1())
        req_id = self._make_warc_id(req_uuid)

        filename = os.path.join(self.warcdir, resp_uuid + '.warc.gz')

        with open(filename, 'w') as out:
            self._write_warc_response(out, warc_id=resp_id)
            out.flush()

        # for now, not writing 'request'
        #with open(os.path.join(self.warcdir, req_uuid + '.warc.gz'), 'w') as out:
        #    self._write_warc_request(out, warc_id=req_id, concur_id=resp_id)

        with open(filename, 'r') as stream:
            stream = CustomNameStream(stream, quote_plus(self.url))
            res = self.ipfs.add(stream)
            if not res:
                print('IPFS ADD FAILED')

            else:
                path = 'ipfs://' + res['Hash']
                self.redisindex.add_record(stream, path)

        os.remove(filename)
示例#2
0
    def __init__(self, warcdir, ipfs, redis):
        super(IPFSWARCRecorder, self).__init__()
        self.warcdir = warcdir
        self.ipfs = ipfs
        self.redisindex = RedisIndexer(redis, 'ipfs:cdxj')

        # experimental dedup support
        #self.dedup = self.redisindex

        try:
            os.makedirs(warcdir)
        except:
            pass