示例#1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    try:
        optlist, args = getopt.getopt(sys.argv[1:], param_short_options,
                                      param_long_options)

    except getopt.error as msg:
        print(globals()["__doc__"], msg)
        sys.exit(1)

    for o, a in optlist:
        if o in ("--help", ):
            print(globals()["__doc__"])
            sys.exit(0)
        elif o in ("--version", ):
            print("version=")
            sys.exit(0)
        elif o in ("-h", "--header-names"):
            param_headers = a.split(",")
        elif o in ("-n", "--normalize"):
            param_normalize = 1
        elif o in ("-m", "--missing-value"):
            param_missing_value = a
        elif o == "--no-titles":
            param_titles = False
        elif o == "--no-titles":
            param_titles = False
        elif o in ("-f", "--format"):
            param_format = a
        elif o == "--format-value":
            param_format_value = a
        elif o == "--bin-format":
            param_format_bin = a
        elif o in ("-s", "--method=sort --sort-order"):
            if a in ("numerical", "alphabetic"):
                param_sort = a
            else:
                param_sort = a.split(",")

    if len(args) < 1:
        print(globals()["__doc__"], "please specify at one histogram.")
        sys.exit(1)

    param_filenames = args

    print(E.GetHeader())
    print(E.GetParams())

    histograms = []

    # first
    headers = [
        'bin',
    ]
    if param_headers and headers != "auto":
        headers = [
            param_headers[0],
        ]
        del param_headers[0]

    for x in range(len(param_filenames)):

        filename = param_filenames[x]
        if not os.path.exists(filename):
            print("# skipped because file not present: %s" % filename)
            continue

        file = IOTools.open_file(filename, "r")

        lines = [x for x in file if x[0] != "#"]

        if len(lines) == 0:
            continue

        if param_titles:
            h = lines[0][:-1].split("\t")[1:]
            del lines[0]

        if param_headers == "auto":
            headers.append(os.path.basename(filename))
        elif param_headers:
            headers.append(param_headers[x])
        elif param_titles:
            headers += h

        data = [list(map(float, x[:-1].split("\t"))) for x in lines]

        # add empty data point for empty histograms
        if len(data) == 0:
            data = [(0, 0)]

        histograms.append(data)

    # sort the whole thing:
    if param_sort:
        sort_order = []

        if param_sort == "numerical":
            t = list(
                zip(list(map(int, headers[1:])),
                    list(range(1,
                               len(headers) + 1))))
            t.sort()

            for tt in t:
                sort_order.append(headers[tt[1]])

        elif param_sort == "alphabetical":
            t = list(zip(headers[1:], list(range(1, len(headers) + 1))))
            t.sort()

            for tt in t:
                sort_order.append(headers[tt[1]])
        else:
            sort_order = param_sort

        # map header to old position
        map_header2pos = {}
        for x in range(1, len(headers)):
            map_header2pos[headers[x]] = x

        order = []
        for x in sort_order:
            if x in map_header2pos:
                order.append(map_header2pos[x])

        new_headers = [headers[0]]
        new_histograms = []

        for x in order:
            new_headers.append(headers[x])
            new_histograms.append(histograms[x - 1])

        histograms = new_histograms
        headers = new_headers

    combined_histogram = Histogram.Combine(histograms, param_missing_value)

    if headers:
        print("\t".join(headers))

    if param_normalize:
        combined_histogram = Histogram.Normalize(combined_histogram)

    Histogram.Print(
        combined_histogram,
        format_bin=param_format_bin,
        format_value=param_format_value,
    )

    print(E.GetFooter())
示例#2
0
                order.append(map_header2pos[x])

        new_headers = [headers[0]]
        new_histograms = []

        for x in order:
            new_headers.append(headers[x])
            new_histograms.append(histograms[x - 1])

        histograms = new_histograms
        headers = new_headers

    combined_histogram = Histogram.Combine(histograms, param_missing_value)

    if headers:
        print "\t".join(headers)

    if param_normalize:
        combined_histogram = Histogram.Normalize(combined_histogram)

    Histogram.Print(combined_histogram,
                    format_bin=param_format_bin,
                    format_value=param_format_value,
                    )

    print E.GetFooter()


if __name__ == "__main__":
    sys.exit(main(sys.argv))
示例#3
0
def main(argv=None):

    if argv == None: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-n",
                      "--nonull",
                      dest="nonull",
                      action="store_true",
                      help="no null [default=%default]")

    parser.add_option("-e",
                      "--show-empty",
                      dest="empty_bins",
                      action="store_true",
                      help="show empty bins [default=%default]")

    parser.add_option("-o",
                      "--normalize",
                      dest="normalize",
                      action="store_true",
                      help="normalize histogram [default=%default]")

    parser.add_option("-i",
                      "--titles",
                      dest="titles",
                      action="store_true",
                      help="use titles supplied in ... [default=%default]")

    parser.add_option("--cumulative",
                      dest="cumulative",
                      action="store_true",
                      help="compute cumulative histogram [default=%default]")

    parser.add_option(
        "--reverse-cumulative",
        dest="reverse_cumulative",
        action="store_true",
        help="compute reverse cumulative histogram [default=%default]")

    parser.add_option("-c",
                      "--column",
                      dest="column",
                      type="int",
                      help="columns to take [default=%default]")

    parser.add_option("-b",
                      "--bin-size",
                      dest="bin_size",
                      type="float",
                      help="bin size to use [default=%default]")

    parser.add_option("-u",
                      "--upper",
                      dest="upper_limit",
                      type="float",
                      help="upper limit to use [default=%default]")

    parser.add_option("-l",
                      "--lower",
                      dest="lower_limit",
                      type="float",
                      help="lower limit to use [default=%default]")

    parser.add_option("-s",
                      "--scale",
                      dest="scale",
                      type="float",
                      help="scale to use [default=%default]")

    parser.add_option("-a",
                      "--append",
                      dest="append",
                      type="choice",
                      action="append",
                      choices=("normalize", ),
                      help="append columns [default=%default]")

    parser.set_defaults(nonull=None,
                        columns=[
                            0,
                        ],
                        empty_bins=True,
                        titles=False,
                        lower_limit=None,
                        upper_limit=None,
                        bin_size=None,
                        scale=None,
                        normalize=None,
                        append=[],
                        cumulative=False,
                        reverse_cumulative=False)

    ## add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.columns:
        if options.columns != "all":
            options.columns = [int(x) - 1 for x in options.columns.split(",")]
    else:
        options.columns.append(0)

    histograms = []

    vals = []

    for x in options.columns:
        vals.append([])

    # retrieve histogram
    lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines())

    ncols = len(string.split(lines[0][:-1], "\t"))
    if options.columns == "all":
        options.columns = range(ncols)
        for x in options.columns:
            vals.append([])

    if options.titles:
        data = lines[0][:-1].split("\t")
        del lines[0]
        options.titles = map(lambda x: data[x], options.columns)

    for l in lines:
        data = string.split(l[:-1], "\t")

        for x in range(len(options.columns)):
            try:
                v = string.atof(data[options.columns[x]])
            except IndexError:
                print "# IndexError in line:", l[:-1]
                continue
            except ValueError:
                continue

            if options.scale:
                v *= options.scale

            if options.upper_limit != None and v > options.upper_limit:
                v = options.upper_limit

            if options.lower_limit != None and v < options.lower_limit:
                v = options.lower_limit

            vals[x].append(v)

    lines = None

    hists = []
    titles = []

    for x in range(len(options.columns)):
        E.info("column=%i, num_values=%i" % (options.columns[x], len(vals[x])))

        if len(vals[x]) == 0: continue

        h = Histogram.Calculate(vals[x],
                                no_empty_bins=options.empty_bins,
                                increment=options.bin_size)
        if options.scale: h = Histogram.Scale(h, 1.0 / options.scale)

        if options.normalize: h = Histogram.Normalize(h)
        if options.cumulative: h = Histogram.Cumulate(h)
        if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0)

        hists.append(h)

        for m in options.append:
            if m == "normalize":
                hists.append(Histogram.Normalize(h))

        if options.titles:
            titles.append(options.titles[x])

    if titles:
        options.stdout.write("bin\t" + "\t".join(titles) + "\n")

    if len(hists) == 1:
        Histogram.Print(hists[0], nonull=options.nonull)
    else:
        combined_histogram = Histogram.Combine(hists)
        Histogram.Print(combined_histogram, nonull=options.nonull)

    E.Stop()
示例#4
0
def main(argv=None):

    if not argv:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: data2histogram.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-r",
                      "--range",
                      dest="range",
                      type="string",
                      help="range to calculate histogram for.")
    parser.add_option("-b",
                      "--bin-size",
                      dest="bin_size",
                      type="string",
                      help="bin size.")
    parser.add_option("-i",
                      "--titles",
                      dest="titles",
                      action="store_true",
                      help="use supplied column titles.")
    parser.add_option("--no-null",
                      dest="nonull",
                      action="store_true",
                      help="do not output null values")
    parser.add_option("--no-titles",
                      dest="titles",
                      action="store_false",
                      help="no column titles given.")
    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option(
        "--min-data",
        dest="min_data",
        type="int",
        help=
        "minimum amount of data required, if less data, then the histogram will be empty [default=%default]."
    )
    parser.add_option("--min-value",
                      dest="min_value",
                      type="float",
                      help="minimum value for histogram.")
    parser.add_option("--max-value",
                      dest="max_value",
                      type="float",
                      help="maximum value for histogram.")
    parser.add_option("--no-empty-bins",
                      dest="no_empty_bins",
                      action="store_true",
                      help="do not display empty bins.")
    parser.add_option("--with-empty-bins",
                      dest="no_empty_bins",
                      action="store_false",
                      help="display empty bins.")
    parser.add_option("--normalize",
                      dest="normalize",
                      action="store_true",
                      help="normalize histogram.")
    parser.add_option("--cumulative",
                      dest="cumulative",
                      action="store_true",
                      help="calculate cumulative histogram.")
    parser.add_option("--reverse-cumulative",
                      dest="reverse_cumulative",
                      action="store_true",
                      help="calculate reverse cumulative histogram.")
    parser.add_option("--header-names",
                      dest="headers",
                      type="string",
                      help="use the following headers.")
    parser.add_option(
        "--ignore-out-of-range",
        dest="ignore_out_of_range",
        action="store_true",
        help=
        "ignore values that are out of range (as opposed to truncating them to range border."
    )
    parser.add_option("--missing-value",
                      dest="missing_value",
                      type="string",
                      help="entry for missing values [%default].")
    parser.add_option("--use-dynamic-bins",
                      dest="dynamic_bins",
                      action="store_true",
                      help="each value constitutes its own bin.")
    parser.add_option(
        "--on-the-fly",
        dest="on_the_fly",
        action="store_true",
        help=
        "on the fly computation of histograms. Requires setting of min-value, max-value and bin_size."
    )

    parser.set_defaults(
        bin_size=None,
        range=None,
        titles=True,
        columns="all",
        append=(),
        no_empty_bins=True,
        min_value=None,
        max_value=None,
        normalize=False,
        cumulative=False,
        reverse_cumulative=False,
        nonull=None,
        ignore_out_of_range=False,
        min_data=1,
        headers=None,
        missing_value="na",
        dynamic_bins=False,
        on_the_fly=False,
        bin_format="%.2f",
        value_format="%6.4f",
    )

    (options, args) = E.Start(parser)

    if options.columns != "all":
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    if options.range:
        options.min_value, options.max_value = list(
            map(float, options.range.split(",")))

    if options.headers:
        options.headers = options.headers.split(",")

    if options.on_the_fly:
        if options.min_value is None or options.max_value is None or \
           options.bin_size is None:
            raise ValueError("please supply columns, min-value, max-value and "
                             "bin-size for on-the-fly computation.")

        # try to glean titles from table:
        if options.titles:
            while 1:
                line = sys.stdin.readline()
                if not line:
                    break
                if line[0] == "#":
                    continue
                data = line[:-1].split("\t")
                break

            if options.columns == "all":
                options.titles = data
                options.columns = list(range(len(data)))
            else:
                options.titles = [data[x] for x in options.columns]

        bins = numpy.arange(options.min_value, options.max_value,
                            float(options.bin_size))
        hh = Histogram.fillHistograms(
            sys.stdin, options.columns,
            [bins for x in range(len(options.columns))])
        n = len(hh)

        titles = ['bin']

        if options.headers:
            titles.append(options.headers[x])
        elif options.titles:
            titles.append(options.titles[x])
        else:
            for x in options.columns:
                titles.append("col%i" % (x + 1))

        if len(titles) > 1:
            options.stdout.write("\t".join(titles) + "\n")

        for x in range(len(bins)):
            v = []
            v.append(options.bin_format % bins[x])
            for c in range(n):
                v.append(options.value_format % hh[c][x])

            options.stdout.write("\t".join(v) + "\n")

    else:
        # in-situ computation of histograms
        # retrieve data
        first = True
        vals = []

        # parse data, convert to floats
        for l in options.stdin:

            if l[0] == "#":
                continue

            data = l[:-1].split("\t")

            if first:
                first = False
                ncols = len(data)
                if options.columns == "all":
                    options.columns = list(range(ncols))

                vals = [[] for x in options.columns]

                if options.titles:
                    try:
                        options.titles = [data[x] for x in options.columns]
                    except IndexError:
                        raise IndexError(
                            "not all columns %s found in data %s" %
                            (str(options.columns), str(data)))
                    continue

            for x in range(len(options.columns)):

                try:
                    v = float(data[options.columns[x]])
                except IndexError:
                    print("# IndexError in line:", l[:-1])
                    continue
                except ValueError:
                    continue

                vals[x].append(v)

        lines = None

        hists = []
        titles = []

        if not vals:
            if options.loglevel >= 1:
                options.stdlog.write("# no data\n")
            E.Stop()
            sys.exit(0)

        for x in range(len(options.columns)):

            if options.loglevel >= 1:
                options.stdlog.write("# column=%i, num_values=%i\n" %
                                     (options.columns[x], len(vals[x])))

            if len(vals[x]) < options.min_data:
                continue

            h = Histogram.Calculate(
                vals[x],
                no_empty_bins=options.no_empty_bins,
                increment=options.bin_size,
                min_value=options.min_value,
                max_value=options.max_value,
                dynamic_bins=options.dynamic_bins,
                ignore_out_of_range=options.ignore_out_of_range)

            if options.normalize:
                h = Histogram.Normalize(h)
            if options.cumulative:
                h = Histogram.Cumulate(h)
            if options.reverse_cumulative:
                h = Histogram.Cumulate(h, direction=0)

            hists.append(h)

            for m in options.append:
                if m == "normalize":
                    hists.append(Histogram.Normalize(h))

            if options.headers:
                titles.append(options.headers[x])
            elif options.titles:
                titles.append(options.titles[x])
            else:
                titles.append("col%i" % options.columns[x])

        if titles:
            options.stdout.write("bin\t" + "\t".join(titles) + "\n")

        if len(hists) == 1:
            Histogram.Print(hists[0],
                            nonull=options.nonull,
                            format_bin=options.bin_format)
        else:
            combined_histogram = Histogram.Combine(
                hists, missing_value=options.missing_value)
            Histogram.Print(combined_histogram,
                            nonull=options.nonull,
                            format_bin=options.bin_format)

    E.Stop()
示例#5
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: graph2stats.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-r",
                      "--range",
                      dest="range",
                      type="string",
                      help="range to calculate histogram for.")
    parser.add_option("-b",
                      "--bin-size",
                      dest="bin_size",
                      type="string",
                      help="bin size.")
    parser.add_option("-i",
                      "--titles",
                      dest="titles",
                      action="store_true",
                      help="use supplied column titles.")
    parser.add_option("-s",
                      "--make-symmetric",
                      dest="make_symmetric",
                      action="store_true",
                      help="symmetrize graph.")
    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-p",
                      "--output-pattern",
                      dest="output_pattern",
                      type="string",
                      help="pattern for output files.")
    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="string",
                      help="method.")
    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="string",
                      help="output format.")

    parser.add_option("--min-value",
                      dest="min_value",
                      type="float",
                      help="minimum value for histogram.")
    parser.add_option("--max-value",
                      dest="max_value",
                      type="float",
                      help="maximum value for histogram.")

    parser.set_defaults(bin_size=None,
                        range=None,
                        titles=False,
                        columns="all",
                        append=(),
                        empty_bins=False,
                        min_value=None,
                        max_value=None,
                        normalize=False,
                        cumulative=False,
                        reverse_cumulative=False,
                        nonull=None,
                        make_symmetric=False,
                        output_pattern="%s.hist",
                        method="histograms",
                        output_format="semi")

    (options, args) = E.Start(parser)

    if options.columns != "all":
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    if options.range:
        options.min_value, options.max_value = map(float,
                                                   options.range(split(",")))

    # retrieve data
    lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines())

    vals = {}

    if options.method == "histograms":

        ## read data
        for line in lines:

            v1, v2, w = line[:-1].split("\t")[:3]

            try:
                w[3] = float(w[3])
            except ValueError:
                nerrors += 1
                continnue

            if v1 not in vals: vals[v1] = {}
            if v2 not in vals[v1]: vals[v1][v2] = []
            vals[v1][v2].append(w)
            if options.make_symmetric:
                if v2 not in vals: vals[v2] = {}
                if v1 not in vals[v2]: vals[v2][v1] = []
                vals[v2][v1].append(w)

        ## convert to histograms
        for k1, vv in vals.items():
            for k2 in vv.keys():
                if len(vv[k2]) == 0: continue

                h = Histogram.Calculate(vv[k2],
                                        no_empty_bins=options.empty_bins,
                                        increment=options.bin_size,
                                        min_value=options.min_value,
                                        max_value=options.max_value)

                if options.normalize: h = Histogram.Normalize(h)
                if options.cumulative: h = Histogram.Cumulate(h)
                if options.reverse_cumulative:
                    h = Histogram.Cumulate(h, direction=0)

                vv[k2] = h

        ## write output
        if options.output == "semi":
            for k1, vv in vals.items():

                outfile = open(options.output_pattern % k1)

                kk2 = vv.keys()
                kk2.sort()

                hists = []
                for k2 in kk2:
                    hists.append(vv[k2])

                PrintHistograms(outfile, kk2, hists, options)

                outfile.close()

    elif options.method == "counts":

        ## read data
        for line in lines:

            v1, v2 = line[:-1].split("\t")[:2]

            if v1 not in vals: vals[v1] = {}
            if v2 not in vals[v1]: vals[v1][v2] = 0
            vals[v1][v2] += 1
            if options.make_symmetric:
                if v2 not in vals: vals[v2] = {}
                if v1 not in vals[v2]: vals[v2][v1] = 0
                vals[v2][v1] += 1

        ## convert to histograms
        for k1, vv in vals.items():
            for k2 in vv.keys():
                options.stdout.write("%s\t%s\t%i\n" % (k1, k2, vv[k2]))

    E.Stop()