def run(dataset): dataset = Dataset.get(dataset) for source in dataset.sources: Context(source).crawl() for dataset_ in dataset.datasets: context = Context(dataset_) context.normalize() context.export()
def export(dataset): dataset = Dataset.get(dataset) for dataset_ in dataset.datasets: context = Context(dataset_) context.normalize() context.export()
def dump_dataset(dataset, outfile): dataset = Dataset.get(dataset) context = Context(dataset) context.normalize() for entity in dataset.store: write_object(outfile, entity)
def crawl(dataset): dataset = Dataset.get(dataset) for source in dataset.sources: Context(source).crawl()
def dump_dataset(dataset, outfile): dataset = Dataset.get(dataset) for source in dataset.sources: # TODO: consolidate the data for entity in source.store: write_object(outfile, entity)