Exemplo n.º 1
0
def main():
    parser = OptionParser(
        description="Merges the split histograms of two or more HRG consensus dendrograms.  Creates a new consensus dendrogram from the merged histograms.  Saves the new consensus dendrogram to a graph markup language (GML) file.",
        prog='hrg-merge-histograms.py',
        usage='%prog [options] GRAPH_EDGELIST_FILE PICKLED_HISTOGRAM ... PICKLED_HISTOGRAM OUTPUT_FILE')

    parser.add_option('-f', '--force', action='store', type=int, default=10000,
        help='Allow overwriting of existing GML dendrogram files')

    (options, args) = parser.parse_args()

    if len(args) < 4:
        parser.print_help()
        return 1

    graph_edgelist = args[0]
    G = nx.read_edgelist(graph_edgelist, nodetype=int)
    filename = os.path.basename(graph_edgelist)
    G.name = os.path.splitext(filename)[0]
    args.remove(graph_edgelist)

    outfile=args.pop()

    if os.path.exists(outfile) and not options.force:
        raise Exception("Output file " + outfile +
            " exists.  Won't overwrite without --force option.")

    n = 0

    histograms = []
    for histfile in args:
        f = open(histfile, 'rb')
        histogram = pickle.load(f)
        if not isinstance(histogram, collections.Mapping):
            raise Exception('Object in ' + histfile +
                ' is not a dictionary: ' + str(type(histogram)))
        if n == 0:
            n = histogram['num_samples']

        if histogram['num_samples'] != n:
            raise Exception('inconsistent number of samples, '
                'expected ' + str(n) + ', actual ' +
                histogram['num_samples'])

        del histogram['num_samples']

        histograms.append(histogram)

    nodes = G.nodes()
    nodes.sort()

    builder = ConsensusDendrogramBuilder()
    C = builder.build(nodes, histograms, n)

    # Save the consensus dendrogram to a GML file.
    nx.write_gml(C, outfile)
    print("Saved merged consensus dendrogram to " + outfile + ".")

    return 0
Exemplo n.º 2
0
def main():
    parser = OptionParser(
        description="Merges the split histograms of two or more hrg consensus dendrograms.  Creates a new consensus dendrogram from the merged histograms.  Saves the new consensus dendrogram to a graph markup language (GML) file.",
        prog='hrg-merge-histograms.py',
        usage='%prog [options] GRAPH_EDGELIST_FILE PICKLED_HISTOGRAM ... PICKLED_HISTOGRAM OUTPUT_FILE')

    parser.add_option('-f', '--force', action='store', type=int, default=10000,
        help='Allow overwriting of existing GML dendrogram files')

    (options, args) = parser.parse_args()

    if len(args) < 4:
        parser.print_help()
        return 1

    graph_edgelist = args[0]
    G = nx.read_edgelist(graph_edgelist, nodetype=int)
    filename = os.path.basename(graph_edgelist)
    G.name = os.path.splitext(filename)[0]
    args.remove(graph_edgelist)

    outfile=args.pop()

    if os.path.exists(outfile) and not options.force:
        raise Exception("Output file " + outfile +
            " exists.  Won't overwrite without --force option.")

    n = 0

    histograms = []
    for histfile in args:
        f = open(histfile, 'rb')
        histogram = pickle.load(f)
        if not isinstance(histogram, collections.Mapping):
            raise Exception('Object in ' + histfile +
                ' is not a dictionary: ' + str(type(histogram)))
        if n == 0:
            n = histogram['num_samples']

        if histogram['num_samples'] != n:
            raise Exception('inconsistent number of samples, '
                'expected ' + str(n) + ', actual ' +
                histogram['num_samples'])

        del histogram['num_samples']

        histograms.append(histogram)

    nodes = G.nodes()
    nodes.sort()

    builder = ConsensusDendrogramBuilder()
    C = builder.build(nodes, histograms, n)

    # Save the consensus dendrogram to a GML file.
    nx.write_gml(C, outfile)
    print("Saved merged consensus dendrogram to " + outfile + ".")

    return 0
Exemplo n.º 3
0
def main():
    parser = OptionParser(
        description=
        "Finds a consensus dendrogram from an HRG model of a network.  Saves the consensus dendrogram to a graph markup language (GML) file.  Saves the histogram of splits in the consensus dendrogram to a file in Python's pickle format.",
        prog='hrg-consensus.py',
        usage='%prog [options] GRAPH_EDGELIST_FILE DENDROGRAM_GML_FILE')

    parser.add_option(
        '-s',
        '--num-samples',
        action='store',
        type=int,
        default=10000,
        help=
        'The number of times to sample the dendrogram\'s splits (default=10000).'
    )

    parser.add_option('-t',
                      '--temperature',
                      action='store',
                      type=float,
                      default=2.0,
                      help='The temperature at which to run (default=2.0).')

    (options, args) = parser.parse_args()

    if len(args) != 2:
        parser.print_help()
        return 1

    graph_edgelist = args[0]
    G = nx.read_edgelist(graph_edgelist, nodetype=int)
    filename = os.path.basename(graph_edgelist)
    G.name = os.path.splitext(filename)[0]

    gml_file = args[1]
    D = Dendrogram.from_gml_file(gml_file, G)

    bestL = initL = D.graph['L']
    bestI = 0

    print_status("step", "L", "best L", "% complete", "consensus size")

    threshold = 1 / (50.0 * G.number_of_nodes())
    burnin = 200 * G.number_of_nodes()
    i = 1

    out = os.path.splitext(graph_edgelist)[0]
    out += '-consensus-temp-%0.2f' % options.temperature
    dendro_file = out + '-dendrogram.gml'
    hist_file = out + '-histogram.dat'
    print("HRG consensus dendrogram will be saved as " + dendro_file)
    print("Split histogram will be saved as " + hist_file)

    while D.num_samples < options.num_samples:
        taken = D.monte_carlo_move(T=options.temperature, debug=False)

        if i > burnin and random.random() < threshold:
            D.sample_splits()

        t = ''
        if taken:
            t = '*'
        if D.graph['L'] > bestL:
            bestL = D.graph['L']

        if i % 4096 == 0:
            nsplits = D.num_samples
            pct_complete = 100 * D.num_samples / float(options.num_samples)
            print_status("[" + str(i) + "]", "%.3f" % D.graph['L'],
                         "%.3f" % bestL, "%8.2f" % pct_complete,
                         "%10d" % nsplits)

        if i % 10 == 0:
            sys.stdout.flush()

        i += 1

    # Save the histogram to a file.
    D.split_histogram['num_samples'] = D.num_samples
    pickle.dump(D.split_histogram, open(hist_file, mode='wb'))
    del D.split_histogram['num_samples']
    print("Saved split histogram to " + hist_file)

    # Build the consensus dendrogram, save it to a file.
    builder = ConsensusDendrogramBuilder()
    C = builder.build(D.graph_nodes_list, D.split_histogram, D.num_samples)
    nx.write_gml(C, out + '-dendrogram.gml')
    print("Saved consensus dendrogram to " + dendro_file)

    return 0
Exemplo n.º 4
0
def main():
    parser = OptionParser(
        description="Finds a consensus dendrogram from an HRG model of a network.  Saves the consensus dendrogram to a graph markup language (GML) file.  Saves the histogram of splits in the consensus dendrogram to a file in Python's pickle format.",
        prog='hrg-consensus.py',
        usage='%prog [options] GRAPH_EDGELIST_FILE DENDROGRAM_GML_FILE')

    parser.add_option('-s', '--num-samples', action='store', type=int,
        default=10000, help='The number of times to sample the dendrogram\'s splits (default=10000).')

    parser.add_option('-t', '--temperature', action='store', type=float,
        default=2.0, help='The temperature at which to run (default=2.0).')

    (options, args) = parser.parse_args()

    if len(args) != 2:
        parser.print_help()
        return 1

    graph_edgelist=args[0]
    G=nx.read_edgelist(graph_edgelist, nodetype=int)
    filename=os.path.basename(graph_edgelist)
    G.name=os.path.splitext(filename)[0]

    gml_file=args[1]
    D=Dendrogram.from_gml_file(gml_file, G)

    bestL=initL=D.graph['L']
    bestI=0

    print_status("step", "L", "best L", "% complete", "consensus size")

    threshold = 1/(50.0*G.number_of_nodes())
    burnin = 200*G.number_of_nodes()
    i=1

    out = os.path.splitext(graph_edgelist)[0]
    out += '-consensus-temp-%0.2f' % options.temperature
    dendro_file = out + '-dendrogram.gml'
    hist_file = out + '-histogram.dat'
    print("HRG consensus dendrogram will be saved as " + dendro_file)
    print("Split histogram will be saved as " + hist_file)

    while D.num_samples < options.num_samples:
        taken=D.monte_carlo_move(T=options.temperature, debug=False)

        if i > burnin and random.random() < threshold:
            D.sample_splits()

        t = ''
        if taken:
            t = '*'
        if D.graph['L'] > bestL:
            bestL=D.graph['L']

        if i % 4096 == 0:
            nsplits = D.num_samples
            pct_complete = 100 * D.num_samples / float(options.num_samples)
            print_status(
                "[" + str(i) + "]",
                "%.3f" % D.graph['L'],
                "%.3f" % bestL,
                "%8.2f" % pct_complete,
                "%10d" % nsplits)

        if i % 10 == 0:
            sys.stdout.flush()

        i+=1

    # Save the histogram to a file.
    D.split_histogram['num_samples'] = D.num_samples
    pickle.dump(D.split_histogram, open(hist_file, mode='wb'))
    del D.split_histogram['num_samples']
    print("Saved split histogram to " + hist_file)

    # Build the consensus dendrogram, save it to a file.
    builder = ConsensusDendrogramBuilder()
    C = builder.build(D.graph_nodes_list, D.split_histogram, D.num_samples)
    nx.write_gml(C, out + '-dendrogram.gml')
    print("Saved consensus dendrogram to " + dendro_file)

    return 0