def handle(self, csvpath, *args, **options): loader = ContributionLoader( source=options.get('source'), description='load from denormalized CSVs', imported_by="loadcontributions.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) try: input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), FIELDNAMES, skiprows=1 + int(options['skip'])) output_func = chain_filters( LoaderEmitter(loader), #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()), Every(self.COMMIT_FREQUENCY, progress_tick)) record_processor = self.get_record_processor(loader.import_session) load_data(input_iterator, record_processor, output_func) transaction.commit() except KeyboardInterrupt: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise except: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise finally: sys.stdout.flush() sys.stderr.flush()
def process_allocated(out_dir, input_path): # create allocated things allocated_csv_filename = os.path.join(out_dir,'nimsp_allocated_contributions.csv') allocated_csv = open(allocated_csv_filename, 'w') allocated_emitter = AllocatedEmitter(allocated_csv, fieldnames=FIELDNAMES) # create unallocated things unallocated_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(unallocated_csv_filename, 'w') unallocated_emitter = UnallocatedEmitter(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid']) input_file = open(input_path, 'r') input_fields = [name for (name, _, _) in CSV_SQL_MAPPING] source = VerifiedCSVSource(input_file, input_fields) output_func = chain_filters( unallocated_emitter, DCIDFilter(SALT_KEY), allocated_emitter) load_data(source, NIMSPDenormalize.get_allocated_record_processor(), output_func) for o in [allocated_csv,unallocated_csv]: o.close()
def load_payment(csvpath, *args, **options): loader = FARAPaymentLoader( source='DOJ', description='load from denormalized CSVs', imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) payment_record_processor = chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('import_reference'), FieldAdder('import_reference', loader.import_session), FieldCopier({'date_asterisk':'date'}), FieldModifier('date', parse_fara_date), FieldModifier('date_asterisk', parse_fara_asterisk), FieldModifier('amount', parse_decimal), FieldModifier(('document_id', 'client_id', 'registrant_id', 'record_id','location_id', 'subcontractor_id'), parse_int), UnicodeFilter(), StringLengthFilter(Payment)) output_func = chain_filters( LoaderEmitter(loader), Every(REPORT_FREQUENCY, progress_tick), ) input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), fieldnames=Payment.FIELDNAMES, skiprows=1) load_data(input_iterator, payment_record_processor, output_func)
def process_allocated(out_dir, input_path): # create allocated things allocated_csv_filename = os.path.join( out_dir, 'nimsp_allocated_contributions.csv') allocated_csv = open(allocated_csv_filename, 'w') allocated_emitter = AllocatedEmitter(allocated_csv, fieldnames=FIELDNAMES) # create unallocated things unallocated_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(unallocated_csv_filename, 'w') unallocated_emitter = UnallocatedEmitter(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid']) input_file = open(input_path, 'r') input_fields = [name for (name, _, _) in CSV_SQL_MAPPING] source = VerifiedCSVSource(input_file, input_fields) output_func = chain_filters(unallocated_emitter, DCIDFilter(SALT_KEY), allocated_emitter) load_data(source, NIMSPDenormalize.get_allocated_record_processor(), output_func) for o in [allocated_csv, unallocated_csv]: o.close()
def load_payment(csvpath, *args, **options): loader = FARAPaymentLoader( source='DOJ', description='load from denormalized CSVs', imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) payment_record_processor = chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('import_reference'), FieldAdder('import_reference', loader.import_session), FieldCopier({'date_asterisk': 'date'}), FieldModifier('date', parse_fara_date), FieldModifier('date_asterisk', parse_fara_asterisk), FieldModifier('amount', parse_decimal), FieldModifier(('document_id', 'client_id', 'registrant_id', 'record_id', 'location_id', 'subcontractor_id'), parse_int), UnicodeFilter(), StringLengthFilter(Payment)) output_func = chain_filters( LoaderEmitter(loader), Every(REPORT_FREQUENCY, progress_tick), ) input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), fieldnames=Payment.FIELDNAMES, skiprows=1) load_data(input_iterator, payment_record_processor, output_func)
def handle(self, input_path, year, **options): imp = Import.objects.create(source=input_path, imported_by=__file__) input_file = open(input_path, 'r') input_source = VerifiedCSVSource(input_file, FIELDS, skiprows=1) processor = LoadTCSEarmarks.get_record_processor(int(year), imp) # todo: real year and import_ref load_data(input_source, processor, save_earmark)
def handle(self, input_path, year, **options): imp = Import.objects.create(source=input_path, imported_by=__file__) input_file = open(input_path, 'r') input_source = VerifiedCSVSource(input_file, FIELDS, skiprows=1) processor = LoadTCSEarmarks.get_record_processor( int(year), imp) # todo: real year and import_ref load_data(input_source, processor, save_earmark)
def denormalize(self, data_path, cycles, catcodes, candidates, committees): infiles = Files(*[os.path.join(data_path, 'raw', 'crp', 'pac_other%s.txt' % cycle) for cycle in cycles]) outfile = open(os.path.join(data_path, 'denormalized', 'denorm_pac2pac.txt'), 'w') output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record source = VerifiedCSVSource(infiles, fieldnames=FILE_TYPES['pac_other'], quotechar="|") record_processor = self.get_record_processor(catcodes, candidates, committees) load_data(source, record_processor, output_func)
def denormalize(self, data_path, cycles, catcodes, candidates, committees): record_processor = self.get_record_processor(catcodes, candidates, committees) for cycle in cycles: in_path = os.path.join(data_path, 'raw', 'crp', 'indivs%s.txt' % cycle) infile = open(in_path, 'r') out_path = os.path.join(data_path, 'denormalized', 'denorm_indivs.%s.txt' % cycle) outfile = open(out_path, 'w') sys.stdout.write('Reading from %s, writing to %s...\n' % (in_path, out_path)) input_source = VerifiedCSVSource(infile, fieldnames=FILE_TYPES['indivs'], quotechar="|") output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record load_data(input_source, record_processor, output_func)
def process_unallocated(out_dir, salts_db): unallocated_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename), 'r') salted_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv') salted_csv = open(salted_csv_filename, 'w') source = VerifiedCSVSource(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid'], skiprows=1) output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record load_data(source, NIMSPDenormalize.get_unallocated_record_processor(salts_db), output_func) for f in [salted_csv,unallocated_csv]: f.close()
def handle(self, csvpath, *args, **options): loader = ContributionLoader( source=options.get('source'), description='load from denormalized CSVs', imported_by="loadcontributions.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) try: input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), FIELDNAMES, skiprows=1 + int(options['skip'])) output_func = chain_filters( LoaderEmitter(loader), #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()), Every(self.COMMIT_FREQUENCY, progress_tick), Every(self.COMMIT_FREQUENCY, lambda i: reset_queries()), ) record_processor = self.get_record_processor(loader.import_session) load_data(input_iterator, record_processor, output_func) transaction.commit() except KeyboardInterrupt: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise except: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise finally: sys.stdout.flush() sys.stderr.flush()
def process_unallocated(out_dir, salts_db): unallocated_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename), 'r') salted_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv') salted_csv = open(salted_csv_filename, 'w') source = VerifiedCSVSource(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid'], skiprows=1) output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record load_data(source, NIMSPDenormalize.get_unallocated_record_processor(salts_db), output_func) for f in [salted_csv, unallocated_csv]: f.close()