Python load_data示例，dcdata.processor.load_data Python示例

示例#1

0

显示文件

文件： loadcontributions.py 项目： lukerosiak/datacommons

    def handle(self, csvpath, *args, **options):
        
        
        loader = ContributionLoader(
            source=options.get('source'),
            description='load from denormalized CSVs',
            imported_by="loadcontributions.py (%s)" % os.getenv('LOGNAME', 'unknown'),
        )
        
        try:
            input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), FIELDNAMES, skiprows=1 + int(options['skip']))
            
            output_func = chain_filters(
                LoaderEmitter(loader),
                #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()),
                Every(self.COMMIT_FREQUENCY, progress_tick))
            
            record_processor = self.get_record_processor(loader.import_session)

            load_data(input_iterator, record_processor, output_func)

            transaction.commit()
        except KeyboardInterrupt:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        except:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        finally:
            sys.stdout.flush()
            sys.stderr.flush()

示例#2

0

显示文件

文件： nimsp_denormalize.py 项目： lukerosiak/datacommons

    def process_allocated(out_dir, input_path):

        # create allocated things
        allocated_csv_filename = os.path.join(out_dir,'nimsp_allocated_contributions.csv')
        allocated_csv = open(allocated_csv_filename, 'w')
        allocated_emitter = AllocatedEmitter(allocated_csv, fieldnames=FIELDNAMES)

        # create unallocated things
        unallocated_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(unallocated_csv_filename, 'w')
        unallocated_emitter = UnallocatedEmitter(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid'])

        input_file = open(input_path, 'r')

        input_fields = [name for (name, _, _) in CSV_SQL_MAPPING]

        source = VerifiedCSVSource(input_file, input_fields)

        output_func = chain_filters(
            unallocated_emitter,
            DCIDFilter(SALT_KEY),
            allocated_emitter)

        load_data(source, NIMSPDenormalize.get_allocated_record_processor(), output_func)

        for o in [allocated_csv,unallocated_csv]:
            o.close()

示例#3

0

显示文件

文件： loadfara.py 项目： dmc2015/datacommons

def load_payment(csvpath, *args, **options):
    loader = FARAPaymentLoader(
        source='DOJ',
        description='load from denormalized CSVs',
        imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'),
    )

    payment_record_processor = chain_filters(
                        CSVFieldVerifier(),
                        FieldRemover('id'),
                        FieldRemover('import_reference'),
                        FieldAdder('import_reference', loader.import_session),
                        FieldCopier({'date_asterisk':'date'}),
                        FieldModifier('date', parse_fara_date),
                        FieldModifier('date_asterisk', parse_fara_asterisk),
                        FieldModifier('amount', parse_decimal),
                        FieldModifier(('document_id', 'client_id', 'registrant_id', 'record_id','location_id', 'subcontractor_id'), parse_int),
                        UnicodeFilter(),
                        StringLengthFilter(Payment))

    output_func = chain_filters(
        LoaderEmitter(loader),
        Every(REPORT_FREQUENCY, progress_tick),
    )

    input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                       fieldnames=Payment.FIELDNAMES,
                                       skiprows=1)

    load_data(input_iterator, payment_record_processor, output_func)

示例#4

0

显示文件

    def process_allocated(out_dir, input_path):

        # create allocated things
        allocated_csv_filename = os.path.join(
            out_dir, 'nimsp_allocated_contributions.csv')
        allocated_csv = open(allocated_csv_filename, 'w')
        allocated_emitter = AllocatedEmitter(allocated_csv,
                                             fieldnames=FIELDNAMES)

        # create unallocated things
        unallocated_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(unallocated_csv_filename, 'w')
        unallocated_emitter = UnallocatedEmitter(unallocated_csv,
                                                 fieldnames=FIELDNAMES +
                                                 ['contributionid'])

        input_file = open(input_path, 'r')

        input_fields = [name for (name, _, _) in CSV_SQL_MAPPING]

        source = VerifiedCSVSource(input_file, input_fields)

        output_func = chain_filters(unallocated_emitter, DCIDFilter(SALT_KEY),
                                    allocated_emitter)

        load_data(source, NIMSPDenormalize.get_allocated_record_processor(),
                  output_func)

        for o in [allocated_csv, unallocated_csv]:
            o.close()

示例#5

0

显示文件

def load_payment(csvpath, *args, **options):
    loader = FARAPaymentLoader(
        source='DOJ',
        description='load from denormalized CSVs',
        imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'),
    )

    payment_record_processor = chain_filters(
        CSVFieldVerifier(), FieldRemover('id'),
        FieldRemover('import_reference'),
        FieldAdder('import_reference', loader.import_session),
        FieldCopier({'date_asterisk': 'date'}),
        FieldModifier('date', parse_fara_date),
        FieldModifier('date_asterisk', parse_fara_asterisk),
        FieldModifier('amount', parse_decimal),
        FieldModifier(('document_id', 'client_id', 'registrant_id',
                       'record_id', 'location_id', 'subcontractor_id'),
                      parse_int), UnicodeFilter(), StringLengthFilter(Payment))

    output_func = chain_filters(
        LoaderEmitter(loader),
        Every(REPORT_FREQUENCY, progress_tick),
    )

    input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                       fieldnames=Payment.FIELDNAMES,
                                       skiprows=1)

    load_data(input_iterator, payment_record_processor, output_func)

示例#6

0

显示文件

文件： loadearmarks.py 项目： aburan28/datacommons

 def handle(self, input_path, year, **options):
     imp = Import.objects.create(source=input_path, imported_by=__file__)
     
     input_file = open(input_path, 'r')
     
     input_source = VerifiedCSVSource(input_file, FIELDS, skiprows=1)
     processor = LoadTCSEarmarks.get_record_processor(int(year), imp) # todo: real year and import_ref
     
     load_data(input_source, processor, save_earmark)

示例#7

0

显示文件

文件： loadearmarks.py 项目： NCDemParty/datacommons

    def handle(self, input_path, year, **options):
        imp = Import.objects.create(source=input_path, imported_by=__file__)

        input_file = open(input_path, 'r')

        input_source = VerifiedCSVSource(input_file, FIELDS, skiprows=1)
        processor = LoadTCSEarmarks.get_record_processor(
            int(year), imp)  # todo: real year and import_ref

        load_data(input_source, processor, save_earmark)

示例#8

0

显示文件

    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        infiles = Files(*[os.path.join(data_path, 'raw', 'crp', 'pac_other%s.txt' % cycle) for cycle in cycles])
        outfile = open(os.path.join(data_path, 'denormalized', 'denorm_pac2pac.txt'), 'w')

        output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record
        source = VerifiedCSVSource(infiles, fieldnames=FILE_TYPES['pac_other'], quotechar="|")

        record_processor = self.get_record_processor(catcodes, candidates, committees)

        load_data(source, record_processor, output_func)

示例#9

0

显示文件

文件： crp_denormalize_individuals.py 项目： aburan28/datacommons

 def denormalize(self, data_path, cycles, catcodes, candidates, committees):
     record_processor = self.get_record_processor(catcodes, candidates, committees)   
        
     for cycle in cycles:
         in_path = os.path.join(data_path, 'raw', 'crp', 'indivs%s.txt' % cycle)
         infile = open(in_path, 'r')
         out_path = os.path.join(data_path, 'denormalized', 'denorm_indivs.%s.txt' % cycle)
         outfile = open(out_path, 'w')
 
         sys.stdout.write('Reading from %s, writing to %s...\n' % (in_path, out_path))
 
         input_source = VerifiedCSVSource(infile, fieldnames=FILE_TYPES['indivs'], quotechar="|")
         output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record
 
         load_data(input_source, record_processor, output_func)

示例#10

0

显示文件

文件： nimsp_denormalize.py 项目： lukerosiak/datacommons

    def process_unallocated(out_dir, salts_db):

        unallocated_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename), 'r')

        salted_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv')
        salted_csv = open(salted_csv_filename, 'w')

        source = VerifiedCSVSource(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid'], skiprows=1)

        output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record

        load_data(source, NIMSPDenormalize.get_unallocated_record_processor(salts_db), output_func)

        for f in [salted_csv,unallocated_csv]:
            f.close()

示例#11

0

显示文件

    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        record_processor = self.get_record_processor(catcodes, candidates,
                                                     committees)

        for cycle in cycles:
            in_path = os.path.join(data_path, 'raw', 'crp',
                                   'indivs%s.txt' % cycle)
            infile = open(in_path, 'r')
            out_path = os.path.join(data_path, 'denormalized',
                                    'denorm_indivs.%s.txt' % cycle)
            outfile = open(out_path, 'w')

            sys.stdout.write('Reading from %s, writing to %s...\n' %
                             (in_path, out_path))

            input_source = VerifiedCSVSource(infile,
                                             fieldnames=FILE_TYPES['indivs'],
                                             quotechar="|")
            output_func = CSVEmitter(outfile,
                                     fieldnames=FIELDNAMES).process_record

            load_data(input_source, record_processor, output_func)

示例#12

0

显示文件

文件： loadcontributions.py 项目： NCDemParty/datacommons

    def handle(self, csvpath, *args, **options):

        loader = ContributionLoader(
            source=options.get('source'),
            description='load from denormalized CSVs',
            imported_by="loadcontributions.py (%s)" %
            os.getenv('LOGNAME', 'unknown'),
        )

        try:
            input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                               FIELDNAMES,
                                               skiprows=1 +
                                               int(options['skip']))

            output_func = chain_filters(
                LoaderEmitter(loader),
                #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()),
                Every(self.COMMIT_FREQUENCY, progress_tick),
                Every(self.COMMIT_FREQUENCY, lambda i: reset_queries()),
            )

            record_processor = self.get_record_processor(loader.import_session)

            load_data(input_iterator, record_processor, output_func)

            transaction.commit()
        except KeyboardInterrupt:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        except:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        finally:
            sys.stdout.flush()
            sys.stderr.flush()

示例#13

0

显示文件

    def process_unallocated(out_dir, salts_db):

        unallocated_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename),
                               'r')

        salted_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv')
        salted_csv = open(salted_csv_filename, 'w')

        source = VerifiedCSVSource(unallocated_csv,
                                   fieldnames=FIELDNAMES + ['contributionid'],
                                   skiprows=1)

        output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record

        load_data(source,
                  NIMSPDenormalize.get_unallocated_record_processor(salts_db),
                  output_func)

        for f in [salted_csv, unallocated_csv]:
            f.close()