示例#1
0
 def test_csv_emitter(self):
     ce = CSVEmitter(self.output, ('x', 'y', 'z'))
     data = ce.attach([{'x': 1, 'y': 2, 'z': 3}, {'x': 5, 'y': 5, 'z': 5}])
     for _ in data:
         pass
     self.assertEquals(self.output.getvalue(),
                       'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
示例#2
0
    def test_csv_emitter(self):

        try:
            import cStringIO    # if Python 2.x then use old cStringIO
            io = cStringIO.StringIO()
        except:
            io = StringIO()     # if Python 3.x then use StringIO

        with closing(io) as output:
            ce = CSVEmitter(output, ('x','y','z'))
            list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}]))
            self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
示例#3
0
def lobbying_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        UnicodeFilter(),
        FieldRemover('Source'),
        FieldMerger({'registrant_name': ('Registrant', 'RegistrantRaw')},
                    name_proc),
        FieldMerger({'registrant_is_firm': ('IsFirm', )}, yn_proc),
        FieldMerger({'client_name': ('Client', 'Client_raw')}, name_proc),
        FieldMerger({'amount': ('Amount', )}, lambda x: float(x or 0)),
        FieldMerger({'affiliate': ('Affiliate', )}, yn_proc),
        FieldMerger({'filing_included_nsfs': ('IncludeNSFS', )}, yn_proc),
        FieldMerger({'include_in_industry_totals': ('Ind', )}, yn_proc),
        FieldMerger({'use': ('Use', )}, yn_proc),
        FieldRenamer({
            'transaction_id': 'Uniqid',
            'transaction_type': 'Type',
            'transaction_type_desc': 'TypeLong',
            'year': 'Year',
            'client_category': 'Catcode',
            'client_parent_name': 'Ultorg',
            'filing_type': 'Self',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
示例#4
0
    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        infiles = Files(*[os.path.join(data_path, 'raw', 'crp', 'pac_other%s.txt' % cycle) for cycle in cycles])
        outfile = open(os.path.join(data_path, 'denormalized', 'denorm_pac2pac.txt'), 'w')

        output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record
        source = VerifiedCSVSource(infiles, fieldnames=FILE_TYPES['pac_other'], quotechar="|")

        record_processor = self.get_record_processor(catcodes, candidates, committees)

        load_data(source, record_processor, output_func)
示例#5
0
def agency_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'transaction': 'UniqID',
            'agency_name': 'Agency',
            'agency_ext_id': 'AgencyID',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
示例#6
0
def bills_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'bill_id': 'B_ID',
            'issue': 'SI_ID',
            'congress_no': 'CongNo',
            'bill_name': 'Bill_Name',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
示例#7
0
def lobbyist_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldMerger({'lobbyist_name': ('Lobbyist', 'Lobbyist_raw')},
                    name_proc),
        FieldMerger({'member_of_congress': ('FormerCongMem', )}, yn_proc),
        FieldRenamer({
            'transaction': 'Uniqid',
            'year': 'Year',
            'lobbyist_ext_id': 'LobbyistID',
            'candidate_ext_id': 'CID',
            'government_position': 'OfficalPos',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
示例#8
0
def issue_handler(inpath, outpath, infields, outfields):

    run_recipe(
        VerifiedCSVSource(open(inpath, 'r'),
                          fieldnames=infields,
                          quotechar='|'),
        FieldCountValidator(len(FILE_TYPES['lob_issue'])),
        CSVFieldVerifier(),
        FieldRenamer({
            'id': 'SI_ID',
            'transaction': 'UniqID',
            'general_issue_code': 'IssueID',
            'general_issue': 'Issue',
            'specific_issue': 'SpecIssue',
            'year': 'Year',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
示例#9
0
    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        record_processor = self.get_record_processor(catcodes, candidates,
                                                     committees)

        for cycle in cycles:
            in_path = os.path.join(data_path, 'raw', 'crp',
                                   'indivs%s.txt' % cycle)
            infile = open(in_path, 'r')
            out_path = os.path.join(data_path, 'denormalized',
                                    'denorm_indivs.%s.txt' % cycle)
            outfile = open(out_path, 'w')

            sys.stdout.write('Reading from %s, writing to %s...\n' %
                             (in_path, out_path))

            input_source = VerifiedCSVSource(infile,
                                             fieldnames=FILE_TYPES['indivs'],
                                             quotechar="|")
            output_func = CSVEmitter(outfile,
                                     fieldnames=FIELDNAMES).process_record

            load_data(input_source, record_processor, output_func)
示例#10
0
    def process_unallocated(out_dir, salts_db):

        unallocated_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename),
                               'r')

        salted_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv')
        salted_csv = open(salted_csv_filename, 'w')

        source = VerifiedCSVSource(unallocated_csv,
                                   fieldnames=FIELDNAMES + ['contributionid'],
                                   skiprows=1)

        output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record

        load_data(source,
                  NIMSPDenormalize.get_unallocated_record_processor(salts_db),
                  output_func)

        for f in [salted_csv, unallocated_csv]:
            f.close()
示例#11
0
 def test_csv_emitter(self):
     ce = CSVEmitter(self.output, ('x','y','z'))
     data = ce.attach([{'x':1,'y':2,'z':3}, {'x':5, 'y':5, 'z':5}])
     for _ in data:
         pass
     self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')