def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: csv_cut.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-r", "--remove", dest="remove", action="store_true", help="remove specified columns, keep all others.") parser.add_option("-u", "--unique", dest="unique", action="store_true", help="output rows are uniq.") parser.add_option( "-l", "--large", dest="large", action="store_true", help= "large columns. Do not use native python CSV module [default=%default]." ) parser.add_option("-f", "--filename-fields", dest="filename_fields", type="string", help="filename with field information.") parser.set_defaults( remove=False, unique=False, large=False, filename_fields=None, ) (options, args) = E.start(parser, add_csv_options=True, quiet=True) input_fields = args if options.filename_fields: input_fields = [ x[:-1].split("\t")[0] for x in [ x for x in IOTools.open_file(options.filename_fields, "r").readlines() if x[0] != "#" ] ] if options.unique: outfile = UniqueBuffer(options.stdout) else: outfile = options.stdout while 1: line = options.stdin.readline() if not line: E.stop() sys.exit(0) if line[0] == "#": continue first_line = line break old_fields = first_line[:-1].split("\t") fields = [] for f in input_fields: # do pattern search if f[0] == "%" and f[-1] == "%": pattern = re.compile(f[1:-1]) for o in old_fields: if pattern.search(o) and o not in fields: fields.append(o) else: if f in old_fields: fields.append(f) if options.remove: fields = set(fields) fields = [x for x in old_fields if x not in fields] if options.large: reader = CSV.DictReaderLarge(CSV.CommentStripper(options.stdin), fieldnames=old_fields, dialect=options.csv_dialect) else: reader = csv.DictReader(CSV.CommentStripper(options.stdin), fieldnames=old_fields, dialect=options.csv_dialect) writer = csv.DictWriter(outfile, fields, dialect=options.csv_dialect, lineterminator=options.csv_lineterminator, extrasaction='ignore') print("\t".join(fields)) first_row = True ninput, noutput, nerrors = 0, 0, 0 while 1: ninput += 1 try: row = six.next(reader) except _csv.Error as msg: options.stderr.write("# error while parsing: %s\n" % (msg)) nerrors += 1 continue except StopIteration: break if not row: break writer.writerow(row) noutput += 1 E.info("ninput=%i, noutput=%i, nerrors=%i" % (ninput, noutput, nerrors)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: csv_cut.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-r", "--remove", dest="remove", action="store_true", help="remove specified columns, keep all others.") parser.add_option("-u", "--unique", dest="unique", action="store_true", help="output rows are uniq.") parser.add_option( "-l", "--large", dest="large", action="store_true", help= "large columns. Do not use native python CSV module [default=%default]." ) parser.add_option("-f", "--filename-fields", dest="filename_fields", type="string", help="filename with field information.") parser.set_defaults( remove=False, unique=False, filename_fields=None, ) (options, args) = E.start(parser, add_csv_options=True, quiet=True) statement = " ".join(args) if options.large: reader = CSV.DictReaderLarge(CSV.CommentStripper(sys.stdin), dialect=options.csv_dialect) else: reader = csv.DictReader(CSV.CommentStripper(sys.stdin), dialect=options.csv_dialect) exec("f = lambda r: %s" % statement, globals()) counter = E.Counter() writer = csv.DictWriter(options.stdout, reader.fieldnames, dialect=options.csv_dialect, lineterminator=options.csv_lineterminator) writer.writerow(dict((fn, fn) for fn in reader.fieldnames)) while 1: counter.input += 1 try: row = next(reader) except _csv.Error as msg: options.stderr.write("# error while parsing: %s\n" % (msg)) counter.errors += 1 continue except StopIteration: break if not row: break if f(row): writer.writerow(row) counter.output += 1 else: counter.filtered += 1 E.info("%s" % counter) E.stop()