示例#1
0
def run(source, modifiers, header=True):
  src = StringIO.StringIO(source)
  dst = StringIO.StringIO()
  reader = csvkit.CSVKitReader(src)
  reader = sed.CsvFilter(reader, modifiers, header=header)
  writer = csvkit.CSVKitWriter(dst)
  for row in reader:
    writer.writerow(row)
  return dst.getvalue()
示例#2
0
 def __init__(self, input_pipe, encoding='utf-8', delimiter='\t', cols=None):
     self.reader = csvkit.CSVKitReader(input_pipe, encoding=encoding,
                                                   delimiter=delimiter)
     self.input_pipe = input_pipe
     self.cols = cols
     if cols:
         header = self.reader.next() # consume the header
         if not tuple(header) == tuple(cols):
             raise RuntimeError('Format mismatch, expected: %s, but got: %s' % 
                                (cols, header))
示例#3
0
    def test_utf8(self):
        output = six.StringIO()
        writer = csvkit.CSVKitWriter(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.CSVKitReader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#4
0
    def test_utf8(self):
        output = six.StringIO()
        writer = csvkit.CSVKitWriter(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.CSVKitReader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#5
0
    def load_organization_name_lookup(self):
        """
        Load organiation name standardization mapping.
        """
        with open(self.organization_name_lookup_filename) as f:
            reader = csvkit.CSVKitReader(f)
            reader.next()

            for row in reader:
                row = map(unicode.strip, row)

                ethics_name = row[0]
                correct_name = row[1]
                category = row[2] or 'Other'

                if not correct_name:
                    correct_name = ethics_name

                self.organization_name_lookup[ethics_name] = correct_name

                try:
                    Organization.get(Organization.name == correct_name)
                except Organization.DoesNotExist:
                    Organization.create(name=correct_name, category=category)
示例#6
0
 def test_utf8(self):
     with open('examples/test_utf8.csv', encoding='utf-8') as f:
         reader = csvkit.CSVKitReader(f)
         self.assertEqual(next(reader), ['a', 'b', 'c'])
         self.assertEqual(next(reader), ['1', '2', '3'])
         self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#7
0
            payload.append([
                current_state, row[4].strip(), row[16].strip(),
                state_hash[current_state.title()]
            ])

    writer.writerows(payload)


def get_stateface():
    with open("data/state_hash.json") as f:
        state_hash = json.load(f)
    with open("data/stateface.json") as f:
        data = json.load(f)
        data.pop("US")
        return dict([[state_hash[k], v] for k, v in data.items()])


if __name__ == "__main__":
    reader = csvkit.CSVKitReader(open("data/raw.csv"))
    writer = csvkit.CSVKitWriter(open("data/data.csv", "wb"))

    regions = [
        "Middle Atlantic", "Midwest", "East North Central",
        "West North Central", "South", "South Atlantic", "East South Central",
        "West South Central", "West", "Mountain.", "Pacific"
    ]
    current_state = None
    payload = []

    main()
示例#8
0
import csvkit
from slugify import slugify

f = open('departments.csv')
reader = csvkit.CSVKitReader(f)
i = 1
headers = ['short', 'full', 'description', 'topics', 'url']

for l in reader:
    values = {header: l[i] for (i, header) in enumerate(headers)}
    values['slug'] = slugify(l[0])
    values['index'] = i

    cat_xml = """
<wp:category>
  <wp:term_id>%(index)s</wp:term_id>
  <wp:category_nicename>%(slug)s</wp:category_nicename>
  <wp:category_parent/>
  <wp:cat_name><![CDATA[%(short)s]]></wp:cat_name>
  <wp:category_description><![CDATA[]]></wp:category_description>
</wp:category>
    """ % (values)
    # print cat_xml.encode('utf-8').strip()

    page_xml = """
<item>
  <title>%(full)s</title>
  <link>http://localhost:19102/departments/%(slug)s/</link>
  <pubDate>Fri, 21 Nov 2014 14:35:07 +0000</pubDate>
  <dc:creator>admin</dc:creator>
  <guid isPermaLink="false">http://localhost:8080/?post_type=department_page&amp;p=%(index)d</guid>
示例#9
0
    return 'Divers'
  return sourcetype

def fix_direction(i, direction):
  return i.get(direction, direction)

# Reading the directions
with open(DIRECTIONS, 'r') as df:
  directions_rows = list(csvkit.DictReader(df, encoding='utf-8'))
  directions_index = {}
  for row in directions_rows:
    directions_index[row['original']] = row['fixed']

# Reading the flows
with open(TARGET, 'r') as tf:
  flows = list(csvkit.CSVKitReader(tf, encoding='utf-8'))
  headers = flows[0]

# Fixing the flows
with open(TARGET, 'w') as of:
  writer = csvkit.CSVKitWriter(of, encoding='utf-8')
  writer.writerow(headers)

  si = headers.index('sourcetype')
  di = headers.index('direction')

  for row in flows[1:]:
    row[si] = fix_source_type(row[si])
    row[di] = fix_direction(directions_index, row[di])

    writer.writerow(row)