Пример #1
0
    def test_stamp_from_raw(self):
        raw_doc = {'doc': 'Macho Man Story', 'timestamps': {}}
        new_stamps = {'done': 'now'}

        stamped_raw = util.stamp_from_raw(raw_doc, **new_stamps)

        assert isinstance(stamped_raw, dict)
        assert set(stamped_raw.keys()) == set(['done', 'normalizeFinished'])
Пример #2
0
def normalize(raw_doc, harvester_name):
    normalized_started = timestamp()
    harvester = registry[harvester_name]

    normalized = harvester.normalize(raw_doc)

    if not normalized:
        raise events.Skip('Did not normalize document with id {}'.format(raw_doc['docID']))

    normalized['timestamps'] = util.stamp_from_raw(raw_doc, normalizeStarted=normalized_started)

    return normalized  # returns a single normalized document
Пример #3
0
def normalize(raw_doc, harvester_name):
    normalized_started = timestamp()
    harvester = registry[harvester_name]

    normalized = harvester.normalize(raw_doc)

    if not normalized:
        raise events.Skip('Did not normalize document with id {}'.format(raw_doc['docID']))

    normalized['timestamps'] = util.stamp_from_raw(raw_doc, normalizeStarted=normalized_started)

    return normalized  # returns a single normalized document