Exemplo n.º 1
0
    def _start(self):
        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)

        self.blocks_exporter = CsvItemExporter(self.blocks_output_file)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file)
Exemplo n.º 2
0
    def _start(self):
        super()._start()

        self.blocks_output_file = get_file_handle(self.blocks_output, binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file, fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file, fields_to_export=self.transaction_fields_to_export)
Exemplo n.º 3
0
def get_item_sink(output_file):
    fh = get_file_handle(output_file, 'w')

    if output_file.endswith('.csv'):
        set_max_field_size_limit()

        writer = None

        def sink(item):
            nonlocal writer
            if writer is None:
                fields = list(six.iterkeys(item))
                writer = csv.DictWriter(fh,
                                        fieldnames=fields,
                                        extrasaction='ignore')
                writer.writeheader()
            writer.writerow(item)
    else:

        def sink(item):
            fh.write(json.dumps(item) + '\n')

    try:
        yield sink
    finally:
        fh.close()
 def open(self):
     for item_type, filename in self.filename_mapping.items():
         self.file_mapping[item_type] = get_file_handle(filename,
                                                        binary=True)
         self.exporter_mapping[item_type] = CsvItemExporter(
             self.file_mapping[item_type],
             fields_to_export=self.field_mapping[item_type])
Exemplo n.º 5
0
    def _start(self):
        # Using bounded executor prevents unlimited queue growth
        # and allows monitoring in-progress futures and failing fast in case of errors.
        self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))

        self.blocks_output_file = get_file_handle(self.blocks_output,
                                                  binary=True)
        self.blocks_exporter = CsvItemExporter(
            self.blocks_output_file,
            fields_to_export=self.block_fields_to_export)

        self.transactions_output_file = get_file_handle(
            self.transactions_output, binary=True)
        self.transactions_exporter = CsvItemExporter(
            self.transactions_output_file,
            fields_to_export=self.transaction_fields_to_export)
    def _start(self):
        super()._start()

        self.output_file = get_file_handle(self.output,
                                           binary=True,
                                           create_parent_dirs=True)
        self.exporter = CsvItemExporter(self.output_file,
                                        fields_to_export=self.fields_to_export)
Exemplo n.º 7
0
    def open(self):
        for item_type, filename in self.filename_mapping.items():
            file = get_file_handle(filename, binary=True)
            fields = self.field_mapping[item_type]
            self.file_mapping[item_type] = file
            if str(filename).endswith('.json'):
                item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
            else:
                item_exporter = CsvItemExporter(file, fields_to_export=fields)
            self.exporter_mapping[item_type] = item_exporter

            self.counter_mapping[item_type] = AtomicCounter()
Exemplo n.º 8
0
def get_item_iterable(input_file):
    fh = get_file_handle(input_file, 'r')

    if input_file.endswith('.csv'):
        set_max_field_size_limit()
        reader = csv.DictReader(fh)
    else:
        reader = (json.loads(line) for line in fh)

    try:
        yield reader
    finally:
        fh.close()
Exemplo n.º 9
0
 def _start(self):
     self.output_file = get_file_handle(self.output, binary=True)
     self.exporter = CsvItemExporter(self.output_file)
Exemplo n.º 10
0
    def _start(self):
        super()._start()

        self.output_file = get_file_handle(self.output,
                                           binary=True,
                                           create_parent_dirs=True)