示例#1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--k",
                      dest="k",
                      type="int",
                      default=0,
                      help="value of k to adjust adaptive tuning function")

    parser.add_option("--out",
                      dest="outfile",
                      type="string",
                      help="output file name")

    parser.add_option("--expression-file",
                      dest="expr",
                      type="string",
                      help="file containing expression data")

    parser.add_option("--parallel",
                      dest="parallel",
                      action="store_true",
                      default=False,
                      help="switches on parallel, will"
                      " split distance matrix into relevant number of"
                      " slices. Start-end positions are defined by"
                      " the file name.")

    parser.add_option("--distance-metric",
                      dest="dist_metric",
                      type="string",
                      help="distance metric to use for dissimilarity of time "
                      "series objects.  Choices: dtw, cross-correlate, "
                      "temporal-correlate. Default=dtw")

    parser.add_option("--lag",
                      dest="lag",
                      type="string",
                      help="cross correlation lag to report")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.start(parser, argv=argv)

    infile = args[-1]

    parser.set_defaults(lag=0, k=0)
    if options.parallel:
        datfile = options.expr
    else:
        datfile = infile

    data = pd.read_table(datfile, sep="\t", index_col=0, header=0)

    # data should already be sorted in time-series order
    # the time and replicate columns needs to be dropped to ensure only the
    # gene data is passed into the DTW function

    # drop header line(s) and non-numerical rows
    try:
        data.drop(['times'], inplace=True, axis=0)
        data.drop(['replicates'], inplace=True, axis=0)
    except ValueError:
        pass
    genes = data.index
    data = data.convert_objects(convert_numeric=True)

    # iterate over the genes list in nested loops to get
    # all pair-wise combinations.

    if options.dist_metric == "dtw":
        if options.parallel:
            start_idx = int(infile.split("-")[3].split("_")[0])
            end_idx = int(infile.split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.dtwWrapper(data=data,
                                rows=genes,
                                columns=slice_idx,
                                k=options.k)
        else:
            df_ = TS.dtwWrapper(data=data,
                                rows=genes,
                                columns=genes,
                                k=options.k)

    elif options.dist_metric == "cross-correlate":

        if options.lag is None:
            options.lag = 0
        else:
            pass

        if options.parallel:
            start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0])
            end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=slice_idx,
                                             method=options.dist_metric,
                                             lag=int(options.lag))
        else:
            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=genes,
                                             method=options.dist_metric,
                                             lag=int(options.lag))

    elif options.dist_metric == "temporal-correlate":
        if options.parallel:
            start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0])
            end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=slice_idx,
                                             method=options.dist_metric)
        else:
            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=genes,
                                             method=options.dist_metric)

    if not options.outfile:
        df_.to_csv(options.stdout, sep="\t")
    else:
        df_.to_csv(options.outfile, sep="\t")

    # write footer and output benchmark information
    E.stop()
示例#2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--test", dest="test", type="string",
                      help="supply help")

    parser.add_option("--k", dest="k", type="int", default=0,
                      help="value of k to adjust adaptive tuning function")

    parser.add_option("--out", dest="outfile", type="string",
                      help="output file name")

    parser.add_option("--expression-file", dest="expr", type="string",
                      help="file containing expression data")

    parser.add_option("--parallel", dest="parallel", action="store_true",
                      default=False, help="switches on parallel, will"
                      " split distance matrix into relevant number of"
                      " slices. Start-end positions are defined by"
                      " the file name.")

    parser.add_option("--distance-metric", dest="dist_metric", type="string",
                      help="distance metric to use for dissimilarity of time "
                      "series objects.  Choices: dtw, cross-correlate, "
                      "temporal-correlate. Default=dtw")

    parser.add_option("--lag", dest="lag", type="string",
                      help="cross correlation lag to report")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    infile = args[-1]

    parser.set_defaults(lag=0,
                        k=0)
    if options.parallel:
        datfile = options.expr
    else:
        datfile = infile

    data = pd.read_table(datfile,
                         sep="\t",
                         index_col=0,
                         header=0)

    # data should already be sorted in time-series order
    # the time and replicate columns needs to be dropped to ensure only the
    # gene data is passed into the DTW function

    # drop header line(s) and non-numerical rows
    try:
        data.drop(['times'], inplace=True, axis=0)
        data.drop(['replicates'], inplace=True, axis=0)
    except ValueError:
        pass
    genes = data.index
    data = data.convert_objects(convert_numeric=True)

    # iterate over the genes list in nested loops to get
    # all pair-wise combinations.

    if options.dist_metric == "dtw":
        if options.parallel:
            start_idx = int(infile.split("-")[3].split("_")[0])
            end_idx = int(infile.split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.dtwWrapper(data=data,
                                rows=genes,
                                columns=slice_idx,
                                k=options.k)
        else:
            df_ = TS.dtwWrapper(data=data,
                                rows=genes,
                                columns=genes,
                                k=options.k)

    elif options.dist_metric == "cross-correlate":

        if options.lag is None:
            options.lag = 0
        else:
            pass

        if options.parallel:
            start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0])
            end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=slice_idx,
                                             method=options.dist_metric,
                                             lag=int(options.lag))
        else:
            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=genes,
                                             method=options.dist_metric,
                                             lag=int(options.lag))

    elif options.dist_metric == "temporal-correlate":
        if options.parallel:
            start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0])
            end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1])
            slice_idx = genes[start_idx:end_idx]

            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=slice_idx,
                                             method=options.dist_metric)
        else:
            df_ = TS.correlateDistanceMetric(data=data,
                                             rows=genes,
                                             columns=genes,
                                             method=options.dist_metric)

    if not options.outfile:
        df_.to_csv(options.stdout, sep="\t")
    else:
        df_.to_csv(options.outfile, sep="\t")

    # write footer and output benchmark information
    E.Stop()