def main(argv=None): if argv == None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-n", "--nonull", dest="nonull", action="store_true", help="no null [default=%default]") parser.add_option("-e", "--show-empty", dest="empty_bins", action="store_true", help="show empty bins [default=%default]") parser.add_option("-o", "--normalize", dest="normalize", action="store_true", help="normalize histogram [default=%default]") parser.add_option("-i", "--titles", dest="titles", action="store_true", help="use titles supplied in ... [default=%default]") parser.add_option("--cumulative", dest="cumulative", action="store_true", help="compute cumulative histogram [default=%default]") parser.add_option( "--reverse-cumulative", dest="reverse_cumulative", action="store_true", help="compute reverse cumulative histogram [default=%default]") parser.add_option("-c", "--column", dest="column", type="int", help="columns to take [default=%default]") parser.add_option("-b", "--bin-size", dest="bin_size", type="float", help="bin size to use [default=%default]") parser.add_option("-u", "--upper", dest="upper_limit", type="float", help="upper limit to use [default=%default]") parser.add_option("-l", "--lower", dest="lower_limit", type="float", help="lower limit to use [default=%default]") parser.add_option("-s", "--scale", dest="scale", type="float", help="scale to use [default=%default]") parser.add_option("-a", "--append", dest="append", type="choice", action="append", choices=("normalize", ), help="append columns [default=%default]") parser.set_defaults(nonull=None, columns=[ 0, ], empty_bins=True, titles=False, lower_limit=None, upper_limit=None, bin_size=None, scale=None, normalize=None, append=[], cumulative=False, reverse_cumulative=False) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if options.columns: if options.columns != "all": options.columns = [int(x) - 1 for x in options.columns.split(",")] else: options.columns.append(0) histograms = [] vals = [] for x in options.columns: vals.append([]) # retrieve histogram lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines()) ncols = len(string.split(lines[0][:-1], "\t")) if options.columns == "all": options.columns = range(ncols) for x in options.columns: vals.append([]) if options.titles: data = lines[0][:-1].split("\t") del lines[0] options.titles = map(lambda x: data[x], options.columns) for l in lines: data = string.split(l[:-1], "\t") for x in range(len(options.columns)): try: v = string.atof(data[options.columns[x]]) except IndexError: print "# IndexError in line:", l[:-1] continue except ValueError: continue if options.scale: v *= options.scale if options.upper_limit != None and v > options.upper_limit: v = options.upper_limit if options.lower_limit != None and v < options.lower_limit: v = options.lower_limit vals[x].append(v) lines = None hists = [] titles = [] for x in range(len(options.columns)): E.info("column=%i, num_values=%i" % (options.columns[x], len(vals[x]))) if len(vals[x]) == 0: continue h = Histogram.Calculate(vals[x], no_empty_bins=options.empty_bins, increment=options.bin_size) if options.scale: h = Histogram.Scale(h, 1.0 / options.scale) if options.normalize: h = Histogram.Normalize(h) if options.cumulative: h = Histogram.Cumulate(h) if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0) hists.append(h) for m in options.append: if m == "normalize": hists.append(Histogram.Normalize(h)) if options.titles: titles.append(options.titles[x]) if titles: options.stdout.write("bin\t" + "\t".join(titles) + "\n") if len(hists) == 1: Histogram.Print(hists[0], nonull=options.nonull) else: combined_histogram = Histogram.Combine(hists) Histogram.Print(combined_histogram, nonull=options.nonull) E.Stop()
def main(argv=None): if not argv: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: data2histogram.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-r", "--range", dest="range", type="string", help="range to calculate histogram for.") parser.add_option("-b", "--bin-size", dest="bin_size", type="string", help="bin size.") parser.add_option("-i", "--titles", dest="titles", action="store_true", help="use supplied column titles.") parser.add_option("--no-null", dest="nonull", action="store_true", help="do not output null values") parser.add_option("--no-titles", dest="titles", action="store_false", help="no column titles given.") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take for calculating histograms.") parser.add_option( "--min-data", dest="min_data", type="int", help= "minimum amount of data required, if less data, then the histogram will be empty [default=%default]." ) parser.add_option("--min-value", dest="min_value", type="float", help="minimum value for histogram.") parser.add_option("--max-value", dest="max_value", type="float", help="maximum value for histogram.") parser.add_option("--no-empty-bins", dest="no_empty_bins", action="store_true", help="do not display empty bins.") parser.add_option("--with-empty-bins", dest="no_empty_bins", action="store_false", help="display empty bins.") parser.add_option("--normalize", dest="normalize", action="store_true", help="normalize histogram.") parser.add_option("--cumulative", dest="cumulative", action="store_true", help="calculate cumulative histogram.") parser.add_option("--reverse-cumulative", dest="reverse_cumulative", action="store_true", help="calculate reverse cumulative histogram.") parser.add_option("--header-names", dest="headers", type="string", help="use the following headers.") parser.add_option( "--ignore-out-of-range", dest="ignore_out_of_range", action="store_true", help= "ignore values that are out of range (as opposed to truncating them to range border." ) parser.add_option("--missing-value", dest="missing_value", type="string", help="entry for missing values [%default].") parser.add_option("--use-dynamic-bins", dest="dynamic_bins", action="store_true", help="each value constitutes its own bin.") parser.add_option( "--on-the-fly", dest="on_the_fly", action="store_true", help= "on the fly computation of histograms. Requires setting of min-value, max-value and bin_size." ) parser.set_defaults( bin_size=None, range=None, titles=True, columns="all", append=(), no_empty_bins=True, min_value=None, max_value=None, normalize=False, cumulative=False, reverse_cumulative=False, nonull=None, ignore_out_of_range=False, min_data=1, headers=None, missing_value="na", dynamic_bins=False, on_the_fly=False, bin_format="%.2f", value_format="%6.4f", ) (options, args) = E.Start(parser) if options.columns != "all": options.columns = [int(x) - 1 for x in options.columns.split(",")] if options.range: options.min_value, options.max_value = list( map(float, options.range.split(","))) if options.headers: options.headers = options.headers.split(",") if options.on_the_fly: if options.min_value is None or options.max_value is None or \ options.bin_size is None: raise ValueError("please supply columns, min-value, max-value and " "bin-size for on-the-fly computation.") # try to glean titles from table: if options.titles: while 1: line = sys.stdin.readline() if not line: break if line[0] == "#": continue data = line[:-1].split("\t") break if options.columns == "all": options.titles = data options.columns = list(range(len(data))) else: options.titles = [data[x] for x in options.columns] bins = numpy.arange(options.min_value, options.max_value, float(options.bin_size)) hh = Histogram.fillHistograms( sys.stdin, options.columns, [bins for x in range(len(options.columns))]) n = len(hh) titles = ['bin'] if options.headers: titles.append(options.headers[x]) elif options.titles: titles.append(options.titles[x]) else: for x in options.columns: titles.append("col%i" % (x + 1)) if len(titles) > 1: options.stdout.write("\t".join(titles) + "\n") for x in range(len(bins)): v = [] v.append(options.bin_format % bins[x]) for c in range(n): v.append(options.value_format % hh[c][x]) options.stdout.write("\t".join(v) + "\n") else: # in-situ computation of histograms # retrieve data first = True vals = [] # parse data, convert to floats for l in options.stdin: if l[0] == "#": continue data = l[:-1].split("\t") if first: first = False ncols = len(data) if options.columns == "all": options.columns = list(range(ncols)) vals = [[] for x in options.columns] if options.titles: try: options.titles = [data[x] for x in options.columns] except IndexError: raise IndexError( "not all columns %s found in data %s" % (str(options.columns), str(data))) continue for x in range(len(options.columns)): try: v = float(data[options.columns[x]]) except IndexError: print("# IndexError in line:", l[:-1]) continue except ValueError: continue vals[x].append(v) lines = None hists = [] titles = [] if not vals: if options.loglevel >= 1: options.stdlog.write("# no data\n") E.Stop() sys.exit(0) for x in range(len(options.columns)): if options.loglevel >= 1: options.stdlog.write("# column=%i, num_values=%i\n" % (options.columns[x], len(vals[x]))) if len(vals[x]) < options.min_data: continue h = Histogram.Calculate( vals[x], no_empty_bins=options.no_empty_bins, increment=options.bin_size, min_value=options.min_value, max_value=options.max_value, dynamic_bins=options.dynamic_bins, ignore_out_of_range=options.ignore_out_of_range) if options.normalize: h = Histogram.Normalize(h) if options.cumulative: h = Histogram.Cumulate(h) if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0) hists.append(h) for m in options.append: if m == "normalize": hists.append(Histogram.Normalize(h)) if options.headers: titles.append(options.headers[x]) elif options.titles: titles.append(options.titles[x]) else: titles.append("col%i" % options.columns[x]) if titles: options.stdout.write("bin\t" + "\t".join(titles) + "\n") if len(hists) == 1: Histogram.Print(hists[0], nonull=options.nonull, format_bin=options.bin_format) else: combined_histogram = Histogram.Combine( hists, missing_value=options.missing_value) Histogram.Print(combined_histogram, nonull=options.nonull, format_bin=options.bin_format) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: graph2stats.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-r", "--range", dest="range", type="string", help="range to calculate histogram for.") parser.add_option("-b", "--bin-size", dest="bin_size", type="string", help="bin size.") parser.add_option("-i", "--titles", dest="titles", action="store_true", help="use supplied column titles.") parser.add_option("-s", "--make-symmetric", dest="make_symmetric", action="store_true", help="symmetrize graph.") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take for calculating histograms.") parser.add_option("-p", "--output-pattern", dest="output_pattern", type="string", help="pattern for output files.") parser.add_option("-m", "--method", dest="method", type="string", help="method.") parser.add_option("-o", "--output-format", dest="output_format", type="string", help="output format.") parser.add_option("--min-value", dest="min_value", type="float", help="minimum value for histogram.") parser.add_option("--max-value", dest="max_value", type="float", help="maximum value for histogram.") parser.set_defaults(bin_size=None, range=None, titles=False, columns="all", append=(), empty_bins=False, min_value=None, max_value=None, normalize=False, cumulative=False, reverse_cumulative=False, nonull=None, make_symmetric=False, output_pattern="%s.hist", method="histograms", output_format="semi") (options, args) = E.Start(parser) if options.columns != "all": options.columns = map(lambda x: int(x) - 1, options.columns.split(",")) if options.range: options.min_value, options.max_value = map(float, options.range(split(","))) # retrieve data lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines()) vals = {} if options.method == "histograms": ## read data for line in lines: v1, v2, w = line[:-1].split("\t")[:3] try: w[3] = float(w[3]) except ValueError: nerrors += 1 continnue if v1 not in vals: vals[v1] = {} if v2 not in vals[v1]: vals[v1][v2] = [] vals[v1][v2].append(w) if options.make_symmetric: if v2 not in vals: vals[v2] = {} if v1 not in vals[v2]: vals[v2][v1] = [] vals[v2][v1].append(w) ## convert to histograms for k1, vv in vals.items(): for k2 in vv.keys(): if len(vv[k2]) == 0: continue h = Histogram.Calculate(vv[k2], no_empty_bins=options.empty_bins, increment=options.bin_size, min_value=options.min_value, max_value=options.max_value) if options.normalize: h = Histogram.Normalize(h) if options.cumulative: h = Histogram.Cumulate(h) if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0) vv[k2] = h ## write output if options.output == "semi": for k1, vv in vals.items(): outfile = open(options.output_pattern % k1) kk2 = vv.keys() kk2.sort() hists = [] for k2 in kk2: hists.append(vv[k2]) PrintHistograms(outfile, kk2, hists, options) outfile.close() elif options.method == "counts": ## read data for line in lines: v1, v2 = line[:-1].split("\t")[:2] if v1 not in vals: vals[v1] = {} if v2 not in vals[v1]: vals[v1][v2] = 0 vals[v1][v2] += 1 if options.make_symmetric: if v2 not in vals: vals[v2] = {} if v1 not in vals[v2]: vals[v2][v1] = 0 vals[v2][v1] += 1 ## convert to histograms for k1, vv in vals.items(): for k2 in vv.keys(): options.stdout.write("%s\t%s\t%i\n" % (k1, k2, vv[k2])) E.Stop()