Exemplo n.º 1
0
def lobbying_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        UnicodeFilter(),
        FieldRemover('Source'),
        FieldMerger({'registrant_name': ('Registrant', 'RegistrantRaw')},
                    name_proc),
        FieldMerger({'registrant_is_firm': ('IsFirm', )}, yn_proc),
        FieldMerger({'client_name': ('Client', 'Client_raw')}, name_proc),
        FieldMerger({'amount': ('Amount', )}, lambda x: float(x or 0)),
        FieldMerger({'affiliate': ('Affiliate', )}, yn_proc),
        FieldMerger({'filing_included_nsfs': ('IncludeNSFS', )}, yn_proc),
        FieldMerger({'include_in_industry_totals': ('Ind', )}, yn_proc),
        FieldMerger({'use': ('Use', )}, yn_proc),
        FieldRenamer({
            'transaction_id': 'Uniqid',
            'transaction_type': 'Type',
            'transaction_type_desc': 'TypeLong',
            'year': 'Year',
            'client_category': 'Catcode',
            'client_parent_name': 'Ultorg',
            'filing_type': 'Self',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
Exemplo n.º 2
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldMerger({'bill_type_raw': ['bill_name']},
                     lambda x: re.sub(r'[^A-Z]*', '', x),
                     keep_fields=True),
         FieldMerger({'bill_type': ['bill_type_raw']},
                     lambda x: self.bill_type_map.get(x, None),
                     keep_fields=True),
         FieldMerger({'bill_no': ['bill_name']},
                     lambda x: self.digits.match(x).groups()[0]
                     if x and self.digits.match(x) else None,
                     keep_fields=True),
         NoneFilter(),
         IssueFilter(),
         UnicodeFilter(),
         CountEmitter(every=20000, log=self.log),
         LoaderEmitter(BillLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=1),
     )
Exemplo n.º 3
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         FieldRemover('committee_fec_id committee_name report_year report_type is_amendment start_date end_date reporting_period_amount_all semi_annual_amount_all'.split()),
         BundleFilter(),
         #FieldModifier('file_num', lambda x: Bundle.objects.get(pk=x)),
         # Convert any stray floats to integers
         FieldModifier('amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x))) if x else None),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=500),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(LobbyistBundle)
     )
Exemplo n.º 4
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldRenamer({'transaction_id': 'transaction'}),
         NoneFilter(),
         TRANSACTION_FILTER,
         UnicodeFilter(),
         CountEmitter(every=10000, log=self.log),
         LoaderEmitter(AgencyLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=100),
     )
Exemplo n.º 5
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldModifier('amount', lambda x: Decimal(x) if x else None),
         FieldModifier(
             ('affiliate', 'filing_included_nsfs',
              'include_in_industry_totals', 'registrant_is_firm', 'use'),
             lambda x: x == 'True'),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=20000, log=self.log),
         LoaderEmitter(
             LobbyingLoader(
                 source=self.inpath,
                 description='load from denormalized CSVs',
                 imported_by="loadlobbying (%s)" %
                 os.getenv('LOGNAME', 'unknown'),
                 log=self.log,
             )),
     )
Exemplo n.º 6
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         # Values are [N|A]. Convert to boolean.
         FieldModifier('is_amendment', \
                 lambda x: x == 'A'),
         # Convert any stray floats to integers
         FieldModifier('reporting_period_amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x.replace('$','').replace(',','')))) if x else None),
         # Convert date formats
         FieldModifier('start_date end_date filing_date'.split(), \
                 lambda x: datetime.strptime(x, '%m/%d/%Y') if x else None),
         # TODO: These following two lines (and the field value) need to be thoroughly tested on the next bundling load
         FieldCopier({'pdf_url': 'first_image_num'}),
         FieldModifier('pdf_url', \
                 lambda x: 'http://query.nictusa.com/pdf/{0}/{1}/{1}.pdf'.format(x[-3:], x)),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=200),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(Bundle)
     )