def fixed2csv(f, schema, output=None, **kwargs): """ Convert a fixed-width file to csv using a CSV-formatted schema description. A schema CSV must start with a header row with (at least) columns labeled "column","start", and "length". (Other columns will be ignored.) For each subsequent row, therefore, those columns will be used to identify a column name, the starting index of the column (an integer), and the length of the column (also an integer). Values in the 'start' column are assumed to be zero-based, unless the first value for 'start' is 1, in which case all values are assumed to be one-based. If output is specified, rows will be written to that object, otherwise the complete data will be returned. """ streaming = True if output else False if not streaming: output = six.StringIO() try: encoding = kwargs['encoding'] except KeyError: encoding = None writer = CSVKitWriter(output) reader = FixedWidthReader(f, schema, encoding=encoding) writer.writerows(reader) if not streaming: data = output.getvalue() return data # Return empty string when streaming return ''
def handle(self, *args, **options): self.cursor = connection.cursor() sql = """ SELECT DISTINCT o.name, o.seat, f.filer_id_raw, f.xref_filer_id, f.name, f.party FROM %(candidate)s as c INNER JOIN %(office)s as o ON c.office_id = o.id INNER JOIN %(filer)s as f ON c.filer_id = f.id """ % dict( candidate=models.Candidate._meta.db_table, office=models.Office._meta.db_table, filer=models.Filer._meta.db_table, ) self.cursor.execute(sql) writer = CSVKitWriter(open("./candidates.csv", 'wb')) writer.writerow([ 'office_name', 'office_seat', 'filer_id', 'xref_filer_id', 'name', 'party' ]) writer.writerows(self.cursor.fetchall())
def log_errors(self, rows): """ Log any errors to a csv file """ # Make sure the log directory exists os.path.exists(self.log_dir) or os.makedirs(self.log_dir) # Log writer log_path = os.path.join( self.log_dir, self.file_name.lower().replace("tsv", "errors.csv") ) log_file = open(log_path, 'w') log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL) # Add the headers log_writer.writerow([ 'Line number', 'Headers len', 'Fields len', 'Line value' ]) # Log out the rows log_writer.writerows(rows) # Shut it down log_file.close()
def log_errors(self, rows): """ Log any errors to a csv file """ # Make sure the log directory exists os.path.exists(self.log_dir) or os.makedirs(self.log_dir) # Log writer log_path = os.path.join( self.log_dir, self.file_name.lower().replace("tsv", "errors.csv") ) log_file = open(log_path, 'w') log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL) # Add the headers log_writer.writerow([ 'Line number', 'Headers len', 'Fields len', 'Line value' ]) # Log out the rows log_writer.writerows(rows) # Shut it down log_file.close()
def to_csv(self, output, **kwargs): """ Serializes the table to CSV and writes it to any file-like object. """ rows = self.to_rows(serialize_dates=True) # Insert header row rows.insert(0, self.headers()) writer = CSVKitWriter(output, **kwargs) writer.writerows(rows)
def to_csv(self, output, **kwargs): """ Serializes the table to CSV and writes it to any file-like object. """ rows = self.to_rows(serialize_dates=True) # Insert header row rows.insert(0, self.headers()) writer = CSVKitWriter(output, **kwargs) writer.writerows(rows)
def main(self): reader = CSVKitReader(self.input_file, **self.reader_kwargs) writer = CSVKitWriter(self.output_file, **self.writer_kwargs) writer.writerows(reader)
def handle(self, *args, **options): self.cursor = connection.cursor() sql = """ SELECT title, first_name, last_name, suffix, occupation, employer, address1, address2, city, state, zipcode, committee_id, COUNT(*) FROM ( SELECT ctrib_namt as title, ctrib_namf as first_name, ctrib_naml as last_name, ctrib_nams as suffix, ctrib_occ as occupation, ctrib_emp as employer, ctrib_adr1 as address1, ctrib_adr2 as address2, ctrib_city as city, ctrib_st as state, ctrib_zip4 as zipcode, cmte_id as committee_id FROM %(rcpt)s UNION ALL SELECT lndr_namt as title, lndr_namf as first_name, lndr_naml as last_name, lndr_nams as suffix, loan_occ as occupation, loan_emp as employer, loan_adr1 as address1, loan_adr2 as address2, loan_city as city, loan_st as state, loan_zip4 as zipcode, cmte_id as committee_id FROM %(loan)s UNION ALL SELECT enty_namt as title, enty_namf as first_name, enty_naml as last_name, enty_nams as suffix, ctrib_occ as occupation, ctrib_emp as employer, '' as address1, '' as address2, enty_city as city, enty_st as state, enty_zip4 as zipcode, cmte_id as committee_id FROM %(s497)s ) as t GROUP BY title, first_name, last_name, suffix, occupation, employer, address1, address2, city, state, zipcode, committee_id ORDER BY last_name, first_name, suffix, title, city, state, occupation, employer """ % dict( rcpt=models.RcptCd._meta.db_table, loan=models.LoanCd._meta.db_table, s497=models.S497Cd._meta.db_table, ) self.cursor.execute(sql) writer = CSVKitWriter(open("./contributors.csv", 'wb')) writer.writerow([ 'title', 'first_name', 'last_name', 'suffix', 'occupation', 'employer', 'address1', 'address2', 'city', 'state', 'zipcode', 'committee_id', 'count' ]) writer.writerows(self.cursor.fetchall())
from utils import get_csv_data from csvkit import CSVKitWriter if __name__ == '__main__': if len(sys.argv) != 3: print "" print "usage: create_group.py <file> <file>" sys.exit(1) filein = sys.argv[1] fileout = sys.argv[2] table = get_csv_data(filein) genera = set() ordered = [['genus']] for idx, row in enumerate(table): if idx == 0: column = row.index('scientific_name') else: genus = row[column].split()[0] if genus not in genera: genera.add(genus) ordered.append([genus]) with open(fileout, 'wb') as fp: writer = CSVKitWriter(fp) writer.writerows(ordered)
def main(self): reader = CSVKitReader(self.input_file, **self.reader_kwargs) writer = CSVKitWriter(self.output_file, **self.writer_kwargs) writer.writerows(reader)
def handle(self, *args, **options): self.cursor = connection.cursor() sql = """ SELECT title, first_name, last_name, suffix, occupation, employer, address1, address2, city, state, zipcode, committee_id, COUNT(*) FROM ( SELECT ctrib_namt as title, ctrib_namf as first_name, ctrib_naml as last_name, ctrib_nams as suffix, ctrib_occ as occupation, ctrib_emp as employer, ctrib_adr1 as address1, ctrib_adr2 as address2, ctrib_city as city, ctrib_st as state, ctrib_zip4 as zipcode, cmte_id as committee_id FROM %(rcpt)s UNION ALL SELECT lndr_namt as title, lndr_namf as first_name, lndr_naml as last_name, lndr_nams as suffix, loan_occ as occupation, loan_emp as employer, loan_adr1 as address1, loan_adr2 as address2, loan_city as city, loan_st as state, loan_zip4 as zipcode, cmte_id as committee_id FROM %(loan)s UNION ALL SELECT enty_namt as title, enty_namf as first_name, enty_naml as last_name, enty_nams as suffix, ctrib_occ as occupation, ctrib_emp as employer, '' as address1, '' as address2, enty_city as city, enty_st as state, enty_zip4 as zipcode, cmte_id as committee_id FROM %(s497)s ) as t GROUP BY title, first_name, last_name, suffix, occupation, employer, address1, address2, city, state, zipcode, committee_id ORDER BY last_name, first_name, suffix, title, city, state, occupation, employer """ % dict( rcpt=models.RcptCd._meta.db_table, loan=models.LoanCd._meta.db_table, s497=models.S497Cd._meta.db_table, ) self.cursor.execute(sql) writer = CSVKitWriter(open("./contributors.csv", 'wb')) writer.writerow([ 'title', 'first_name', 'last_name', 'suffix', 'occupation', 'employer', 'address1', 'address2', 'city', 'state', 'zipcode', 'committee_id', 'count' ]) writer.writerows(self.cursor.fetchall())