Python Timeseries.treeCutting示例

def main(argv=None):
    """script main.

parses command line options in sys.argv, unless *argv* is given.
"""

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--task",
                      dest="task",
                      type="string",
                      help="analysis task to be executed")

    parser.add_option("--infile",
                      dest="infile",
                      type="string",
                      help="input file path")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=("replicate", "resample"),
                      help="whether to use replicate or resample "
                      "for consensus clustering.")

    parser.add_option("--cluster-algorithm",
                      dest="cluster",
                      type="string",
                      help="hierarchical clustering algorithm")

    parser.add_option("--expression-file",
                      dest="express",
                      type="string",
                      help="matching expression data from input"
                      " distance matrix")

    parser.add_option("--cluster-file",
                      dest="clustfile",
                      type="string",
                      help="file to output cluster labels to")

    parser.add_option("--output-file",
                      dest="outfile",
                      type="string",
                      help="output file to write to")

    parser.add_option("--cut-height",
                      dest="cutHeight",
                      type="string",
                      help="threshold at which to define consensus clusters"
                      "as valid")

    parser.add_option("--split-clusters",
                      dest="split",
                      action="store_true",
                      help="switch for using deepSplit in tree cutting")

    parser.add_option("--cluster-size",
                      dest="cluster_size",
                      type="int",
                      help="minimum cluster size for tree cutting. Clusters "
                      "with fewer than this many objects will be merged with "
                      "nearest cluster. Default=30")

    parser.add_option("--image-dir",
                      dest="images_dir",
                      type="string",
                      help="directory to write plots/figures to")

    (options, args) = E.Start(parser, argv=argv)

    infile = argv[-1]

    parser.set_defaults(cutHeight=0,
                        conditions=None,
                        split=False,
                        cluster_size=30)

    if options.task == "cluster":

        data_frame = TS.treeCutting(infile=infile,
                                    expression_file=options.express,
                                    cluster_file=options.clustfile,
                                    cluster_algorithm=options.cluster,
                                    deepsplit=options.split)

    elif options.task == "clustagree":
        if options.method == "resample":
            data_frame = TS.clusterAgreement(infile)
        elif options.method == "replicate":
            file_list = infile.split(",")
            data_frame = TS.clusterAverage(file_list)

    elif options.task == "consensus-cluster":
        min_size = int(options.cluster_size)
        data_frame = TS.consensusClustering(infile=infile,
                                            cutHeight=float(options.cutHeight),
                                            cluster_algorithm=options.cluster,
                                            min_size=min_size,
                                            deepsplit=options.split)

    elif options.task == "pca":
        files = infile.split(",")
        infile = files[1]
        cluster_file = files[0]
        data_frame = TS.clusterPCA(infile=infile,
                                   cluster_file=cluster_file,
                                   image_dir=options.images_dir)

    else:
        pass

    data_frame.to_csv(options.stdout,
                      sep="\t",
                      header=True,
                      index_label="gene_id")

    # Write footer and output benchmark information.
    E.Stop()

示例#2

显示文件

文件： distance2clusters.py 项目： SCV/cgat

def main(argv=None):
    """script main.

parses command line options in sys.argv, unless *argv* is given.
"""

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--task", dest="task", type="string",
                      help="analysis task to be executed")

    parser.add_option("--infile", dest="infile", type="string",
                      help="input file path")

    parser.add_option("--method", dest="method", type="choice",
                      choices=("replicate", "resample"),
                      help="whether to use replicate or resample "
                      "for consensus clustering.")

    parser.add_option("--cluster-algorithm", dest="cluster", type="string",
                      help="hierarchical clustering algorithm")

    parser.add_option("--expression-file", dest="express", type="string",
                      help="matching expression data from input"
                      " distance matrix")

    parser.add_option("--cluster-file", dest="clustfile", type="string",
                      help="file to output cluster labels to")

    parser.add_option("--output-file", dest="outfile", type="string",
                      help="output file to write to")

    parser.add_option("--cut-height", dest="cutHeight", type="string",
                      help="threshold at which to define consensus clusters"
                      "as valid")

    parser.add_option("--split-clusters", dest="split", action="store_true",
                      help="switch for using deepSplit in tree cutting")

    parser.add_option("--cluster-size", dest="cluster_size", type="int",
                      help="minimum cluster size for tree cutting. Clusters "
                      "with fewer than this many objects will be merged with "
                      "nearest cluster. Default=30")

    parser.add_option("--image-dir", dest="images_dir", type="string",
                      help="directory to write plots/figures to")

# add common options (-h/--help, ...) and parse command line

    (options, args) = E.Start(parser, argv=argv)

    infile = argv[-1]

    parser.set_defaults(cutHeight=0,
                        conditions=None,
                        split=False,
                        cluster_size=30)

    if options.task == "cluster":

        data_frame = TS.treeCutting(infile=infile,
                                    expression_file=options.express,
                                    cluster_file=options.clustfile,
                                    cluster_algorithm=options.cluster,
                                    deepsplit=options.split)

    elif options.task == "clustagree":
        if options.method == "resample":
            data_frame = TS.clusterAgreement(infile)
        elif options.method == "replicate":
            file_list = infile.split(",")
            data_frame = TS.clusterAverage(file_list)

    elif options.task == "consensus-cluster":
        min_size = int(options.cluster_size)
        data_frame = TS.consensusClustering(infile=infile,
                                            cutHeight=float(options.cutHeight),
                                            cluster_algorithm=options.cluster,
                                            min_size=min_size,
                                            deepsplit=options.split)

    elif options.task == "pca":
        files = infile.split(",")
        infile = files[1]
        cluster_file = files[0]
        data_frame = TS.clusterPCA(infile=infile,
                                   cluster_file=cluster_file,
                                   image_dir=options.images_dir)

    else:
        pass

    data_frame.to_csv(options.stdout,
                      sep="\t",
                      header=True,
                      index_label="gene_id")

    # Write footer and output benchmark information.
    E.Stop()