def run(source, modifiers, header=True): src = StringIO.StringIO(source) dst = StringIO.StringIO() reader = csvkit.CSVKitReader(src) reader = sed.CsvFilter(reader, modifiers, header=header) writer = csvkit.CSVKitWriter(dst) for row in reader: writer.writerow(row) return dst.getvalue()
def __init__(self, input_pipe, encoding='utf-8', delimiter='\t', cols=None): self.reader = csvkit.CSVKitReader(input_pipe, encoding=encoding, delimiter=delimiter) self.input_pipe = input_pipe self.cols = cols if cols: header = self.reader.next() # consume the header if not tuple(header) == tuple(cols): raise RuntimeError('Format mismatch, expected: %s, but got: %s' % (cols, header))
def test_utf8(self): output = six.StringIO() writer = csvkit.CSVKitWriter(output) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.CSVKitReader(written) self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def test_utf8(self): output = six.StringIO() writer = csvkit.CSVKitWriter(output, encoding='utf-8') self.assertEqual(writer._eight_bit, True) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.CSVKitReader(written, encoding='utf-8') self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def load_organization_name_lookup(self): """ Load organiation name standardization mapping. """ with open(self.organization_name_lookup_filename) as f: reader = csvkit.CSVKitReader(f) reader.next() for row in reader: row = map(unicode.strip, row) ethics_name = row[0] correct_name = row[1] category = row[2] or 'Other' if not correct_name: correct_name = ethics_name self.organization_name_lookup[ethics_name] = correct_name try: Organization.get(Organization.name == correct_name) except Organization.DoesNotExist: Organization.create(name=correct_name, category=category)
def test_utf8(self): with open('examples/test_utf8.csv', encoding='utf-8') as f: reader = csvkit.CSVKitReader(f) self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
payload.append([ current_state, row[4].strip(), row[16].strip(), state_hash[current_state.title()] ]) writer.writerows(payload) def get_stateface(): with open("data/state_hash.json") as f: state_hash = json.load(f) with open("data/stateface.json") as f: data = json.load(f) data.pop("US") return dict([[state_hash[k], v] for k, v in data.items()]) if __name__ == "__main__": reader = csvkit.CSVKitReader(open("data/raw.csv")) writer = csvkit.CSVKitWriter(open("data/data.csv", "wb")) regions = [ "Middle Atlantic", "Midwest", "East North Central", "West North Central", "South", "South Atlantic", "East South Central", "West South Central", "West", "Mountain.", "Pacific" ] current_state = None payload = [] main()
import csvkit from slugify import slugify f = open('departments.csv') reader = csvkit.CSVKitReader(f) i = 1 headers = ['short', 'full', 'description', 'topics', 'url'] for l in reader: values = {header: l[i] for (i, header) in enumerate(headers)} values['slug'] = slugify(l[0]) values['index'] = i cat_xml = """ <wp:category> <wp:term_id>%(index)s</wp:term_id> <wp:category_nicename>%(slug)s</wp:category_nicename> <wp:category_parent/> <wp:cat_name><![CDATA[%(short)s]]></wp:cat_name> <wp:category_description><![CDATA[]]></wp:category_description> </wp:category> """ % (values) # print cat_xml.encode('utf-8').strip() page_xml = """ <item> <title>%(full)s</title> <link>http://localhost:19102/departments/%(slug)s/</link> <pubDate>Fri, 21 Nov 2014 14:35:07 +0000</pubDate> <dc:creator>admin</dc:creator> <guid isPermaLink="false">http://localhost:8080/?post_type=department_page&p=%(index)d</guid>
return 'Divers' return sourcetype def fix_direction(i, direction): return i.get(direction, direction) # Reading the directions with open(DIRECTIONS, 'r') as df: directions_rows = list(csvkit.DictReader(df, encoding='utf-8')) directions_index = {} for row in directions_rows: directions_index[row['original']] = row['fixed'] # Reading the flows with open(TARGET, 'r') as tf: flows = list(csvkit.CSVKitReader(tf, encoding='utf-8')) headers = flows[0] # Fixing the flows with open(TARGET, 'w') as of: writer = csvkit.CSVKitWriter(of, encoding='utf-8') writer.writerow(headers) si = headers.index('sourcetype') di = headers.index('direction') for row in flows[1:]: row[si] = fix_source_type(row[si]) row[di] = fix_direction(directions_index, row[di]) writer.writerow(row)