def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take from table.") parser.add_option("-m", "--method", dest="methods", type="string", help="methods to apply to columns.", action="append") parser.add_option("-e", "--echo", dest="echo", action="store_true", help="echo columns not taken.") parser.add_option("-r", "--replace", dest="replace", action="store_true", help="replace orginial values.") parser.set_defaults(columns="1", echo=False, replace=False, format="%5.2f", methods=[]) (options, args) = E.start(parser) options.columns = [int(x) - 1 for x in options.columns.split(",")] print(E.GetHeader()) print(E.GetParams()) vals = [] # retrieve histogram lines = [x for x in sys.stdin.readlines() if x[0] != "#"] headers = lines[0][:-1].split("\t") del lines[0] notcolumns = [x for x in range(len(headers)) if x not in options.columns] data = [[] for x in range(len(headers))] for l in lines: d = l[:-1].split("\t") for c in options.columns: data[c].append(float(d[c])) for c in notcolumns: data[c].append(d[c]) if len(data) == 0: raise ValueError("no data found") totals = [0] * len(headers) for c in options.columns: totals[c] = reduce(lambda x, y: x + y, data[c]) new_columns = [] new_headers = [] if options.echo: for c in notcolumns: new_headers.append(headers[c]) new_columns.append(data[c]) for c in options.columns: if not options.replace: new_columns.append(data[c]) new_headers.append(headers[c]) for method in options.methods: if method == "normalize": new_columns.append([d / totals[c] for d in data[c]]) new_headers.append("normalized") print(string.join(new_headers, "\t")) for d in zip(*new_columns): print(string.join(list(map(str, d)), "\t")) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv try: optlist, args = getopt.getopt(sys.argv[1:], param_short_options, param_long_options) except getopt.error as msg: print(globals()["__doc__"], msg) sys.exit(1) for o, a in optlist: if o in ("--help", ): print(globals()["__doc__"]) sys.exit(0) elif o in ("--version", ): print("version=") sys.exit(0) elif o in ("-h", "--header-names"): param_headers = a.split(",") elif o in ("-n", "--normalize"): param_normalize = 1 elif o in ("-m", "--missing-value"): param_missing_value = a elif o == "--no-titles": param_titles = False elif o == "--no-titles": param_titles = False elif o in ("-f", "--format"): param_format = a elif o == "--format-value": param_format_value = a elif o == "--bin-format": param_format_bin = a elif o in ("-s", "--method=sort --sort-order"): if a in ("numerical", "alphabetic"): param_sort = a else: param_sort = a.split(",") if len(args) < 1: print(globals()["__doc__"], "please specify at one histogram.") sys.exit(1) param_filenames = args print(E.GetHeader()) print(E.GetParams()) histograms = [] # first headers = [ 'bin', ] if param_headers and headers != "auto": headers = [ param_headers[0], ] del param_headers[0] for x in range(len(param_filenames)): filename = param_filenames[x] if not os.path.exists(filename): print("# skipped because file not present: %s" % filename) continue file = IOTools.open_file(filename, "r") lines = [x for x in file if x[0] != "#"] if len(lines) == 0: continue if param_titles: h = lines[0][:-1].split("\t")[1:] del lines[0] if param_headers == "auto": headers.append(os.path.basename(filename)) elif param_headers: headers.append(param_headers[x]) elif param_titles: headers += h data = [list(map(float, x[:-1].split("\t"))) for x in lines] # add empty data point for empty histograms if len(data) == 0: data = [(0, 0)] histograms.append(data) # sort the whole thing: if param_sort: sort_order = [] if param_sort == "numerical": t = list( zip(list(map(int, headers[1:])), list(range(1, len(headers) + 1)))) t.sort() for tt in t: sort_order.append(headers[tt[1]]) elif param_sort == "alphabetical": t = list(zip(headers[1:], list(range(1, len(headers) + 1)))) t.sort() for tt in t: sort_order.append(headers[tt[1]]) else: sort_order = param_sort # map header to old position map_header2pos = {} for x in range(1, len(headers)): map_header2pos[headers[x]] = x order = [] for x in sort_order: if x in map_header2pos: order.append(map_header2pos[x]) new_headers = [headers[0]] new_histograms = [] for x in order: new_headers.append(headers[x]) new_histograms.append(histograms[x - 1]) histograms = new_histograms headers = new_headers combined_histogram = Histogram.Combine(histograms, param_missing_value) if headers: print("\t".join(headers)) if param_normalize: combined_histogram = Histogram.Normalize(combined_histogram) Histogram.Print( combined_histogram, format_bin=param_format_bin, format_value=param_format_value, ) print(E.GetFooter())
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv param_long_options = [ "verbose=", "help", "split-regex=", "after", "pattern-output=", "skip", "column=", "map=", "dry-run", "header", "remove-key", "append", "pattern-identifier=", "version", "chunk-size="] param_short_options = "v:hr:ap:sc:dek" param_loglevel = 1 param_split_at_regex = None param_after = None param_skip = None param_pattern_output = "%s.chunk" param_split_column = None param_filename_map = None param_dry_run = False param_header = False param_remove_key = False param_append = "w" param_pattern_identifier = None param_chunk_size = 1 try: optlist, args = getopt.getopt(sys.argv[1:], param_short_options, param_long_options) except getopt.error as msg: print(USAGE, msg) sys.exit(1) for o, a in optlist: if o in ("-v", "--verbose"): param_loglevel = int(a) elif o in ("--version", ): print("version=") sys.exit(0) elif o in ("-h", "--help"): print(USAGE) sys.exit(0) elif o in ("-r", "--split-regex"): param_split_at_regex = re.compile(a) elif o in ("-a", "--after"): param_after = 1 elif o in ("-s", "--skip"): param_skip = 1 elif o in ("-p", "--pattern-output"): param_pattern_output = a elif o in ("-c", "--column"): param_split_column = int(a) - 1 elif o in ("-m", "--map"): param_filename_map = a elif o in ("-d", "--dry-run"): param_dry_run = True elif o in ("-e", "--header-names"): param_header = True elif o in ("-r", "--remove-key"): param_remove_key = True elif o == "--append": param_append = "a" elif o == "--pattern-identifier": param_pattern_identifier = re.compile(a) elif o == "--chunk-size": param_chunk_size = int(a) print(E.GetHeader()) print(E.GetParams()) mymap = {} if param_filename_map: infile = IOTools.open_file(param_filename_map, "r") for line in infile: if line[0] == "#": continue data = line[:-1].split("\t")[:2] mymap[data[0]] = data[1] filenames = set() found = set() ninput, noutput = 0, 0 if param_split_column is not None: header = None files = {} for line in sys.stdin: if line[0] == "#": continue ninput += 1 if param_header: if not header: header = line[:-1] continue else: header = None data = line[:-1].split("\t") try: key = data[param_split_column] except ValueError: continue if param_pattern_identifier: key = param_pattern_identifier.search(key).groups()[0] if mymap: if key in mymap: key = mymap[key] else: continue found.add(key) filename = re.sub("%s", key, param_pattern_output) filenames.add(filename) if filename not in files: # reset if too many files are open if len(files) > 1000: if param_loglevel >= 1: print("# resetting all files.") sys.stdout.flush() for f in list(files.values()): f.close() files = {} files[filename] = CreateOpen( filename, "a", param_dry_run, header) if param_remove_key: del data[param_split_column] files[filename].write(string.join(data, "\t") + "\n") else: files[filename].write(line) noutput += 1 for f in list(files.values()): f.close() else: file_id = 0 filename = re.sub("%s", str(file_id), param_pattern_output) outfile = CreateOpen(filename, param_append, param_dry_run) nlines = 0 header = param_header split = 0 for line in sys.stdin: if param_split_at_regex and param_split_at_regex.search(line[:-1]): split += 1 if split == param_chunk_size: if param_after: nlines += 1 outfile.write(line) if nlines > 0: outfile.close() file_id += 1 filename = re.sub("%s", str(file_id), param_pattern_output) outfile = CreateOpen( filename, param_append, param_dry_run, header) filenames.add(filename) split = 0 nlines = 0 if param_after or param_skip: continue outfile.write(line) nlines += 1 outfile.close() if param_loglevel >= 1: sys.stdout.write( "# ninput=%i, noutput=%i, nfound=%i, nnotfound=%i, nfiles=%i\n" % ( ninput, noutput, len(found), len(set(mymap).difference(found)), len(filenames))) print(E.GetFooter())