def run(self): run_recipe( CSVSource(open(self.inpath)), FieldMerger({'bill_type_raw': ['bill_name']}, lambda x: re.sub(r'[^A-Z]*', '', x), keep_fields=True), FieldMerger({'bill_type': ['bill_type_raw']}, lambda x: self.bill_type_map.get(x, None), keep_fields=True), FieldMerger({'bill_no': ['bill_name']}, lambda x: self.digits.match(x).groups()[0] if x and self.digits.match(x) else None, keep_fields=True), NoneFilter(), IssueFilter(), UnicodeFilter(), CountEmitter(every=20000, log=self.log), LoaderEmitter(BillLoader( source=self.inpath, description='load from denormalized CSVs', imported_by="loadlobbying (%s)" % os.getenv('LOGNAME', 'unknown'), log=self.log, ), commit_every=1), )
def run(self): run_recipe( CSVSource(open(self.inpath)), FieldRenamer(self.field_map), FieldRemover('committee_fec_id committee_name report_year report_type is_amendment start_date end_date reporting_period_amount_all semi_annual_amount_all'.split()), BundleFilter(), #FieldModifier('file_num', lambda x: Bundle.objects.get(pk=x)), # Convert any stray floats to integers FieldModifier('amount semi_annual_amount'.split(), \ lambda x: int(round(float(x))) if x else None), NoneFilter(), UnicodeFilter(), CountEmitter(every=500), #DebugEmitter(), SimpleDjangoModelEmitter(LobbyistBundle) )
def run(self): run_recipe( CSVSource(open(self.inpath)), FieldModifier('year', lambda x: int(x) if x else None), FieldRenamer({'transaction_id': 'transaction'}), NoneFilter(), TRANSACTION_FILTER, UnicodeFilter(), CountEmitter(every=10000, log=self.log), LoaderEmitter(AgencyLoader( source=self.inpath, description='load from denormalized CSVs', imported_by="loadlobbying (%s)" % os.getenv('LOGNAME', 'unknown'), log=self.log, ), commit_every=100), )
def run(self): run_recipe( CSVSource(open(self.inpath)), FieldModifier('year', lambda x: int(x) if x else None), FieldModifier('amount', lambda x: Decimal(x) if x else None), FieldModifier( ('affiliate', 'filing_included_nsfs', 'include_in_industry_totals', 'registrant_is_firm', 'use'), lambda x: x == 'True'), NoneFilter(), UnicodeFilter(), CountEmitter(every=20000, log=self.log), LoaderEmitter( LobbyingLoader( source=self.inpath, description='load from denormalized CSVs', imported_by="loadlobbying (%s)" % os.getenv('LOGNAME', 'unknown'), log=self.log, )), )
def run(self): run_recipe( CSVSource(open(self.inpath)), FieldRenamer(self.field_map), # Values are [N|A]. Convert to boolean. FieldModifier('is_amendment', \ lambda x: x == 'A'), # Convert any stray floats to integers FieldModifier('reporting_period_amount semi_annual_amount'.split(), \ lambda x: int(round(float(x.replace('$','').replace(',','')))) if x else None), # Convert date formats FieldModifier('start_date end_date filing_date'.split(), \ lambda x: datetime.strptime(x, '%m/%d/%Y') if x else None), # TODO: These following two lines (and the field value) need to be thoroughly tested on the next bundling load FieldCopier({'pdf_url': 'first_image_num'}), FieldModifier('pdf_url', \ lambda x: 'http://query.nictusa.com/pdf/{0}/{1}/{1}.pdf'.format(x[-3:], x)), NoneFilter(), UnicodeFilter(), CountEmitter(every=200), #DebugEmitter(), SimpleDjangoModelEmitter(Bundle) )