示例#1
0
文件: sed.py 项目: dsnopek/csvsed
 def __init__(self, spec):
     super(E_modifier, self).__init__()
     if not spec or len(spec) < 3 or spec[0] != 'e':
         raise InvalidModifierSpec(spec)
     espec = spec.split(spec[1])
     if len(espec) != 3:
         raise InvalidModifierSpec(spec)
     espec[2] = espec[2].lower()
     self.command = espec[1]
     self.index = 1 if 'i' in espec[2] else None
     self.csv = 'c' in espec[2]
     if not self.csv:
         return
     self.proc = subprocess.Popen(self.command,
                                  shell=True,
                                  bufsize=0,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
     self.writer = csvkit.CSVKitWriter(self.proc.stdin)
     # note: not using csvkit's reader because there is no easy way of
     # making it not read-ahead (which breaks the "continuous" mode).
     # self.reader = csvkit.CSVKitReader(self.proc.stdout)
     # todo: fix csvkit so that it can be used in non-read-ahead mode.
     self.reader = csv.reader(ReadlineIterator(self.proc.stdout))
示例#2
0
 def __init__(self, output_pipe, delimiter='\t', encoding='utf-8', cols=None):
     self.writer = csvkit.CSVKitWriter(output_pipe, delimiter=delimiter,
                                                    encoding=encoding)
     self.output_pipe = output_pipe
     self.cols = cols
     if cols:
         # write header
         self.writer.writerow(cols)
示例#3
0
def run(source, modifiers, header=True):
  src = StringIO.StringIO(source)
  dst = StringIO.StringIO()
  reader = csvkit.CSVKitReader(src)
  reader = sed.CsvFilter(reader, modifiers, header=header)
  writer = csvkit.CSVKitWriter(dst)
  for row in reader:
    writer.writerow(row)
  return dst.getvalue()
示例#4
0
    def test_utf8(self):
        output = six.StringIO()
        writer = csvkit.CSVKitWriter(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.CSVKitReader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#5
0
    def test_utf8(self):
        output = six.StringIO()
        writer = csvkit.CSVKitWriter(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.CSVKitReader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#6
0
            payload.append([
                current_state, row[4].strip(), row[16].strip(),
                state_hash[current_state.title()]
            ])

    writer.writerows(payload)


def get_stateface():
    with open("data/state_hash.json") as f:
        state_hash = json.load(f)
    with open("data/stateface.json") as f:
        data = json.load(f)
        data.pop("US")
        return dict([[state_hash[k], v] for k, v in data.items()])


if __name__ == "__main__":
    reader = csvkit.CSVKitReader(open("data/raw.csv"))
    writer = csvkit.CSVKitWriter(open("data/data.csv", "wb"))

    regions = [
        "Middle Atlantic", "Midwest", "East North Central",
        "West North Central", "South", "South Atlantic", "East South Central",
        "West South Central", "West", "Mountain.", "Pacific"
    ]
    current_state = None
    payload = []

    main()
示例#7
0
  return sourcetype

def fix_direction(i, direction):
  return i.get(direction, direction)

# Reading the directions
with open(DIRECTIONS, 'r') as df:
  directions_rows = list(csvkit.DictReader(df, encoding='utf-8'))
  directions_index = {}
  for row in directions_rows:
    directions_index[row['original']] = row['fixed']

# Reading the flows
with open(TARGET, 'r') as tf:
  flows = list(csvkit.CSVKitReader(tf, encoding='utf-8'))
  headers = flows[0]

# Fixing the flows
with open(TARGET, 'w') as of:
  writer = csvkit.CSVKitWriter(of, encoding='utf-8')
  writer.writerow(headers)

  si = headers.index('sourcetype')
  di = headers.index('direction')

  for row in flows[1:]:
    row[si] = fix_source_type(row[si])
    row[di] = fix_direction(directions_index, row[di])

    writer.writerow(row)