Пример #1
0
def fixed2csv(f, schema, output=None, **kwargs):
    """
    Convert a fixed-width file to csv using a CSV-formatted schema description.

    A schema CSV must start with a header row with (at least) columns labeled "column","start", and "length". (Other columns will be ignored.) For each subsequent row, therefore, those columns will be used to identify a column name, the starting index of the column (an integer), and the length of the column (also an integer).
    
    Values in the 'start' column are assumed to be zero-based, unless the first value for 'start' is 1, in which case all values are assumed to be one-based.

    If output is specified, rows will be written to that object, otherwise the complete data will be returned.
    """
    streaming = True if output else False

    if not streaming:
        output = six.StringIO()

    try:
        encoding = kwargs['encoding']
    except KeyError:
        encoding = None

    writer = CSVKitWriter(output)

    reader = FixedWidthReader(f, schema, encoding=encoding)
    writer.writerows(reader)

    if not streaming:
        data = output.getvalue()
        return data
    
    # Return empty string when streaming
    return ''
 def handle(self, *args, **options):
     self.cursor = connection.cursor()
     sql = """
     SELECT DISTINCT
         o.name,
         o.seat,
         f.filer_id_raw,
         f.xref_filer_id,
         f.name,
         f.party
     FROM %(candidate)s as c
     INNER JOIN %(office)s as o
     ON c.office_id = o.id
     INNER JOIN %(filer)s as f
     ON c.filer_id = f.id
     """ % dict(
         candidate=models.Candidate._meta.db_table,
         office=models.Office._meta.db_table,
         filer=models.Filer._meta.db_table,
     )
     self.cursor.execute(sql)
     writer = CSVKitWriter(open("./candidates.csv", 'wb'))
     writer.writerow([
         'office_name',
         'office_seat',
         'filer_id',
         'xref_filer_id',
         'name',
         'party'
     ])
     writer.writerows(self.cursor.fetchall())
    def log_errors(self, rows):
        """
        Log any errors to a csv file
        """
        # Make sure the log directory exists
        os.path.exists(self.log_dir) or os.makedirs(self.log_dir)

        # Log writer
        log_path = os.path.join(
            self.log_dir,
            self.file_name.lower().replace("tsv", "errors.csv")
        )
        log_file = open(log_path, 'w')
        log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL)

        # Add the headers
        log_writer.writerow([
            'Line number',
            'Headers len',
            'Fields len',
            'Line value'
        ])

        # Log out the rows
        log_writer.writerows(rows)

        # Shut it down
        log_file.close()
Пример #4
0
    def log_errors(self, rows):
        """
        Log any errors to a csv file
        """
        # Make sure the log directory exists
        os.path.exists(self.log_dir) or os.makedirs(self.log_dir)

        # Log writer
        log_path = os.path.join(
            self.log_dir,
            self.file_name.lower().replace("tsv", "errors.csv")
        )
        log_file = open(log_path, 'w')
        log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL)

        # Add the headers
        log_writer.writerow([
            'Line number',
            'Headers len',
            'Fields len',
            'Line value'
        ])

        # Log out the rows
        log_writer.writerows(rows)

        # Shut it down
        log_file.close()
Пример #5
0
    def to_csv(self, output, **kwargs):
        """
        Serializes the table to CSV and writes it to any file-like object.
        """
        rows = self.to_rows(serialize_dates=True)

        # Insert header row
        rows.insert(0, self.headers())

        writer = CSVKitWriter(output, **kwargs)
        writer.writerows(rows)
Пример #6
0
    def to_csv(self, output, **kwargs):
        """
        Serializes the table to CSV and writes it to any file-like object.
        """
        rows = self.to_rows(serialize_dates=True)

        # Insert header row
        rows.insert(0, self.headers())

        writer = CSVKitWriter(output, **kwargs)
        writer.writerows(rows)
Пример #7
0
    def main(self):
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)

        writer = CSVKitWriter(self.output_file, **self.writer_kwargs)

        writer.writerows(reader)
    def handle(self, *args, **options):
        self.cursor = connection.cursor()
        sql = """
        SELECT
            title,
            first_name,
            last_name,
            suffix,
            occupation,
            employer,
            address1,
            address2,
            city,
            state,
            zipcode,
            committee_id,
            COUNT(*)
        FROM (
            SELECT
                ctrib_namt as title,
                ctrib_namf as first_name,
                ctrib_naml as last_name,
                ctrib_nams as suffix,
                ctrib_occ as occupation,
                ctrib_emp as employer,
                ctrib_adr1 as address1,
                ctrib_adr2 as address2,
                ctrib_city as city,
                ctrib_st as state,
                ctrib_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(rcpt)s

            UNION ALL

            SELECT
                lndr_namt as title,
                lndr_namf as first_name,
                lndr_naml as last_name,
                lndr_nams as suffix,
                loan_occ as occupation,
                loan_emp as employer,
                loan_adr1 as address1,
                loan_adr2 as address2,
                loan_city as city,
                loan_st as state,
                loan_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(loan)s

            UNION ALL

            SELECT
                enty_namt as title,
                enty_namf as first_name,
                enty_naml as last_name,
                enty_nams as suffix,
                ctrib_occ as occupation,
                ctrib_emp as employer,
                '' as address1,
                '' as address2,
                enty_city as city,
                enty_st as state,
                enty_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(s497)s
        ) as t
        GROUP BY
            title,
            first_name,
            last_name,
            suffix,
            occupation,
            employer,
            address1,
            address2,
            city,
            state,
            zipcode,
            committee_id
        ORDER BY
            last_name,
            first_name,
            suffix,
            title,
            city,
            state,
            occupation,
            employer
        """ % dict(
            rcpt=models.RcptCd._meta.db_table,
            loan=models.LoanCd._meta.db_table,
            s497=models.S497Cd._meta.db_table,
        )
        self.cursor.execute(sql)
        writer = CSVKitWriter(open("./contributors.csv", 'wb'))
        writer.writerow([
            'title',
            'first_name',
            'last_name',
            'suffix',
            'occupation',
            'employer',
            'address1',
            'address2',
            'city',
            'state',
            'zipcode',
            'committee_id',
            'count'
        ])
        writer.writerows(self.cursor.fetchall())
Пример #9
0
from utils import get_csv_data
from csvkit import CSVKitWriter


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print ""
        print "usage: create_group.py <file> <file>"
        sys.exit(1)

    filein = sys.argv[1]
    fileout = sys.argv[2]

    table = get_csv_data(filein)

    genera = set()
    ordered = [['genus']]

    for idx, row in enumerate(table):
        if idx == 0:
            column = row.index('scientific_name')
        else:
            genus = row[column].split()[0]
            if genus not in genera:
                genera.add(genus)
                ordered.append([genus])

    with open(fileout, 'wb') as fp:
        writer = CSVKitWriter(fp)
        writer.writerows(ordered)
Пример #10
0
    def main(self):
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)

        writer = CSVKitWriter(self.output_file, **self.writer_kwargs)

        writer.writerows(reader)
    def handle(self, *args, **options):
        self.cursor = connection.cursor()
        sql = """
        SELECT
            title,
            first_name,
            last_name,
            suffix,
            occupation,
            employer,
            address1,
            address2,
            city,
            state,
            zipcode,
            committee_id,
            COUNT(*)
        FROM (
            SELECT
                ctrib_namt as title,
                ctrib_namf as first_name,
                ctrib_naml as last_name,
                ctrib_nams as suffix,
                ctrib_occ as occupation,
                ctrib_emp as employer,
                ctrib_adr1 as address1,
                ctrib_adr2 as address2,
                ctrib_city as city,
                ctrib_st as state,
                ctrib_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(rcpt)s

            UNION ALL

            SELECT
                lndr_namt as title,
                lndr_namf as first_name,
                lndr_naml as last_name,
                lndr_nams as suffix,
                loan_occ as occupation,
                loan_emp as employer,
                loan_adr1 as address1,
                loan_adr2 as address2,
                loan_city as city,
                loan_st as state,
                loan_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(loan)s

            UNION ALL

            SELECT
                enty_namt as title,
                enty_namf as first_name,
                enty_naml as last_name,
                enty_nams as suffix,
                ctrib_occ as occupation,
                ctrib_emp as employer,
                '' as address1,
                '' as address2,
                enty_city as city,
                enty_st as state,
                enty_zip4 as zipcode,
                cmte_id as committee_id
            FROM %(s497)s
        ) as t
        GROUP BY
            title,
            first_name,
            last_name,
            suffix,
            occupation,
            employer,
            address1,
            address2,
            city,
            state,
            zipcode,
            committee_id
        ORDER BY
            last_name,
            first_name,
            suffix,
            title,
            city,
            state,
            occupation,
            employer
        """ % dict(
            rcpt=models.RcptCd._meta.db_table,
            loan=models.LoanCd._meta.db_table,
            s497=models.S497Cd._meta.db_table,
        )
        self.cursor.execute(sql)
        writer = CSVKitWriter(open("./contributors.csv", 'wb'))
        writer.writerow([
            'title', 'first_name', 'last_name', 'suffix', 'occupation',
            'employer', 'address1', 'address2', 'city', 'state', 'zipcode',
            'committee_id', 'count'
        ])
        writer.writerows(self.cursor.fetchall())