def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle', 'fec_rec_no')},
                        lambda cycle, fecid: 'pac2cand:%s:%s' % (cycle, fecid),
                        keep_fields=True),
            FieldMerger({'transaction_type': ('type', )},
                        lambda t: t.strip().lower()),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # contributor and recipient fields
            ContributorFilter(committees),
            FieldRenamer({'contributor_ext_id': 'pac_id'}),
            FieldAdder('contributor_type', 'C'),
            Pac2CandRecipientFilter(candidates),
            FieldAdder('recipient_type', 'P'),

            # catcode
            CatCodeFilter('contributor', catcodes),

            # add static fields
            FieldAdder('is_amendment', False),
            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')},
                        lambda curr, cycle: ""
                        if cycle != 'Y' else curr == 'Y' and cycle == 'Y',
                        keep_fields=False),

            # filter through spec
            SpecFilter(SPEC))
Пример #2
0
    def test_field_adder_no_replace(self):
        fa = FieldAdder('b', lambda: 7, replace=False)

        expected_data = [{'a':1, 'b':2, 'c':3},
                 {'a':5, 'b':5, 'c':5},
                 {'a':1, 'b':10, 'c':100}]
        self.assert_filter_result(fa, expected_data)
Пример #3
0
    def test_field_adder_iterable(self):
        fa = FieldAdder('x', [1,2,3])

        expected_data = [{'a':1, 'b':2, 'c':3, 'x':1},
                 {'a':5, 'b':5, 'c':5, 'x':2},
                 {'a':1, 'b':10, 'c':100, 'x': 3}]
        self.assert_filter_result(fa, expected_data)
Пример #4
0
    def test_field_adder_callable(self):
        fa = FieldAdder('x', lambda: 7)

        expected_data = [{'a':1, 'b':2, 'c':3, 'x':7},
                 {'a':5, 'b':5, 'c':5, 'x':7},
                 {'a':1, 'b':10, 'c':100, 'x': 7}]
        self.assert_filter_result(fa, expected_data)
Пример #5
0
def load_payment(csvpath, *args, **options):
    loader = FARAPaymentLoader(
        source='DOJ',
        description='load from denormalized CSVs',
        imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'),
    )

    payment_record_processor = chain_filters(
        CSVFieldVerifier(), FieldRemover('id'),
        FieldRemover('import_reference'),
        FieldAdder('import_reference', loader.import_session),
        FieldCopier({'date_asterisk': 'date'}),
        FieldModifier('date', parse_fara_date),
        FieldModifier('date_asterisk', parse_fara_asterisk),
        FieldModifier('amount', parse_decimal),
        FieldModifier(('document_id', 'client_id', 'registrant_id',
                       'record_id', 'location_id', 'subcontractor_id'),
                      parse_int), UnicodeFilter(), StringLengthFilter(Payment))

    output_func = chain_filters(
        LoaderEmitter(loader),
        Every(REPORT_FREQUENCY, progress_tick),
    )

    input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                       fieldnames=Payment.FIELDNAMES,
                                       skiprows=1)

    load_data(input_iterator, payment_record_processor, output_func)
Пример #6
0
 def get_record_processor(import_session):
     return chain_filters(
         CSVFieldVerifier(), FieldRemover('id'),
         FieldRemover('import_reference'),
         FieldAdder('import_reference', import_session),
         FieldModifier('amount', lambda a: Decimal(str(a))),
         FieldModifier(['cycle'], parse_int),
         FieldModifier(['date'], parse_date), BooleanFilter('is_amendment'),
         UnicodeFilter(), StringLengthFilter(Contribution))
Пример #7
0
    def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            ContribRecipFilter(),
            CommitteeFilter(committees),
            Pac2PacRecipientFilter(candidates, committees),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle','fec_rec_no')}, lambda cycle, fecid: 'pac2pac:%s:%s' % (cycle, fecid), keep_fields=True),
            FieldMerger({'transaction_type': ('type',)}, lambda t: t.strip().lower()),

            # filing reference ID
            FieldRenamer({'filing_id': 'microfilm'}),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # catcode
            FieldMerger({'contributor_category': ('real_code',)}, lambda s: s.upper() if s else "", keep_fields=True),
            FieldMerger({'recipient_category': ('recip_prim_code',)}, lambda s: s.upper() if s else "", keep_fields=True),

            FieldRenamer({'contributor_city': 'city',
                          'contributor_state': 'state',
                          'contributor_zipcode': 'zipcode',
                          'contributor_occupation': 'fec_occ_emp',
                          'recipient_party': 'party',}),
            FieldModifier('contributor_state', lambda s: s.strip().upper() if s else ""),

            FieldAdder('contributor_type', 'C'),


            # add static fields
            FieldAdder('jurisdiction', 'F'),
            FieldMerger({'is_amendment': ('amend',)}, lambda s: s.strip().upper() != 'N'),

            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False ),

            # filter through spec
            SpecFilter(SPEC))
Пример #8
0
 def get_record_processor(year, import_ref):
     return chain_filters(
         CSVFieldVerifier(),
         FieldRemover('id'),
         FieldRemover('county'),
         FieldAdder('fiscal_year', year),
         FieldAdder('import_reference', import_ref),
         FieldModifier(['notes', 'house_members', 'senate_members'],
                       lambda s: string_filter(s, 1024)),
         FieldModifier([
             'description', 'house_parties', 'house_states',
             'house_districts', 'senate_parties', 'senate_states',
             'raw_recipient'
         ], lambda s: string_filter(s, 512)),
         FieldModifier(['bill_section', 'bill_subsection'],
                       lambda s: string_filter(s, 256)),
         FieldModifier([
             'budget_amount', 'senate_amount', 'house_amount',
             'omni_amount', 'final_amount'
         ], amount_filter),
         FieldMerger({'description': ('project_heading', 'description')},
                     _prepend),
         FieldModifier(['presidential'],
                       lambda p: presidential_raw.get(p, '')),
         FieldModifier(['undisclosed'],
                       lambda u: undisclosed_raw.get(u, '')),
         FieldMerger({'locations': ('city', 'state')},
                     _normalize_locations),
         FieldMerger(
             {
                 'members':
                 ('house_members', 'house_parties', 'house_states',
                  'house_districts', 'senate_members', 'senate_parties',
                  'senate_states')
             },
             _normalize_members,
             keep_fields=True),
         FieldMerger({'recipients': ('raw_recipient', )},
                     _normalize_recipients),
     )
Пример #9
0
def agency_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'transaction': 'UniqID',
            'agency_name': 'Agency',
            'agency_ext_id': 'AgencyID',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
Пример #10
0
def bills_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'bill_id': 'B_ID',
            'issue': 'SI_ID',
            'congress_no': 'CongNo',
            'bill_name': 'Bill_Name',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
Пример #11
0
def lobbyist_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldMerger({'lobbyist_name': ('Lobbyist', 'Lobbyist_raw')},
                    name_proc),
        FieldMerger({'member_of_congress': ('FormerCongMem', )}, yn_proc),
        FieldRenamer({
            'transaction': 'Uniqid',
            'year': 'Year',
            'lobbyist_ext_id': 'LobbyistID',
            'candidate_ext_id': 'CID',
            'government_position': 'OfficalPos',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
Пример #12
0
    def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle', 'fec_trans_id')},
                        lambda cycle, fecid: 'indiv:%s:%s' % (cycle, fecid),
                        keep_fields=True),
            FieldMerger({'transaction_type': ('type', )},
                        lambda t: t.strip().lower() if t else '',
                        keep_fields=True),

            # filing reference ID
            FieldRenamer({'filing_id': 'microfilm'}),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # rename contributor, organization, and parent_organization fields
            FieldRenamer({
                'contributor_name': 'contrib',
                'parent_organization_name': 'ult_org',
            }),
            IndivRecipientFilter(candidates, committees),
            CommitteeFilter(committees),
            OrganizationFilter(),

            # create URNs
            FieldRenamer({
                'contributor_ext_id': 'contrib_id',
                'committee_ext_id': 'cmte_id'
            }),

            # address and gender fields
            FieldRenamer({
                'contributor_address': 'street',
                'contributor_city': 'city',
                'contributor_state': 'state',
                'contributor_zipcode': 'zipcode',
                'contributor_gender': 'gender'
            }),
            FieldModifier('contributor_state', lambda s: s.upper()
                          if s else ""),
            FieldModifier('contributor_gender', lambda s: s.upper()
                          if s else ""),

            # employer/occupation filter
            FECOccupationFilter(),

            # catcode
            CatCodeFilter('contributor', catcodes),

            # add static fields
            FieldAdder('contributor_type', 'I'),
            FieldAdder('is_amendment', False),
            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')},
                        lambda curr, cycle: ""
                        if cycle != 'Y' else curr == 'Y' and cycle == 'Y',
                        keep_fields=False),

            # filter through spec
            SpecFilter(SPEC))
Пример #13
0
from saucebrush.filters import Splitter, PhoneNumberCleaner, FieldMerger, FieldAdder
from saucebrush.emitters import DebugEmitter
import operator
from itertools import count
import saucebrush

data = [{
    'person': {
        'firstname': 'James',
        'lastname': 'Turk'
    },
    'phones': [{
        'phone': '222-222-2222'
    }, {
        'phone': '(202) 333-3321'
    }]
}]

namemerger = FieldMerger({'name': ('firstname', 'lastname')},
                         lambda x, y: ' '.join((x, y)))
phonecleaner = PhoneNumberCleaner(('phone', ))
splitter = Splitter({'person': [namemerger], 'phones': [phonecleaner]})
ider = FieldAdder('id', count())

saucebrush.run_recipe(data, ider, splitter, DebugEmitter())