Пример #1
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldMerger({'bill_type_raw': ['bill_name']},
                     lambda x: re.sub(r'[^A-Z]*', '', x),
                     keep_fields=True),
         FieldMerger({'bill_type': ['bill_type_raw']},
                     lambda x: self.bill_type_map.get(x, None),
                     keep_fields=True),
         FieldMerger({'bill_no': ['bill_name']},
                     lambda x: self.digits.match(x).groups()[0]
                     if x and self.digits.match(x) else None,
                     keep_fields=True),
         NoneFilter(),
         IssueFilter(),
         UnicodeFilter(),
         CountEmitter(every=20000, log=self.log),
         LoaderEmitter(BillLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=1),
     )
Пример #2
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         FieldRemover('committee_fec_id committee_name report_year report_type is_amendment start_date end_date reporting_period_amount_all semi_annual_amount_all'.split()),
         BundleFilter(),
         #FieldModifier('file_num', lambda x: Bundle.objects.get(pk=x)),
         # Convert any stray floats to integers
         FieldModifier('amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x))) if x else None),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=500),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(LobbyistBundle)
     )
Пример #3
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldRenamer({'transaction_id': 'transaction'}),
         NoneFilter(),
         TRANSACTION_FILTER,
         UnicodeFilter(),
         CountEmitter(every=10000, log=self.log),
         LoaderEmitter(AgencyLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=100),
     )
Пример #4
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldModifier('amount', lambda x: Decimal(x) if x else None),
         FieldModifier(
             ('affiliate', 'filing_included_nsfs',
              'include_in_industry_totals', 'registrant_is_firm', 'use'),
             lambda x: x == 'True'),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=20000, log=self.log),
         LoaderEmitter(
             LobbyingLoader(
                 source=self.inpath,
                 description='load from denormalized CSVs',
                 imported_by="loadlobbying (%s)" %
                 os.getenv('LOGNAME', 'unknown'),
                 log=self.log,
             )),
     )
Пример #5
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         # Values are [N|A]. Convert to boolean.
         FieldModifier('is_amendment', \
                 lambda x: x == 'A'),
         # Convert any stray floats to integers
         FieldModifier('reporting_period_amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x.replace('$','').replace(',','')))) if x else None),
         # Convert date formats
         FieldModifier('start_date end_date filing_date'.split(), \
                 lambda x: datetime.strptime(x, '%m/%d/%Y') if x else None),
         # TODO: These following two lines (and the field value) need to be thoroughly tested on the next bundling load
         FieldCopier({'pdf_url': 'first_image_num'}),
         FieldModifier('pdf_url', \
                 lambda x: 'http://query.nictusa.com/pdf/{0}/{1}/{1}.pdf'.format(x[-3:], x)),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=200),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(Bundle)
     )