def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--k", dest="k", type="int", default=0, help="value of k to adjust adaptive tuning function") parser.add_option("--out", dest="outfile", type="string", help="output file name") parser.add_option("--expression-file", dest="expr", type="string", help="file containing expression data") parser.add_option("--parallel", dest="parallel", action="store_true", default=False, help="switches on parallel, will" " split distance matrix into relevant number of" " slices. Start-end positions are defined by" " the file name.") parser.add_option("--distance-metric", dest="dist_metric", type="string", help="distance metric to use for dissimilarity of time " "series objects. Choices: dtw, cross-correlate, " "temporal-correlate. Default=dtw") parser.add_option("--lag", dest="lag", type="string", help="cross correlation lag to report") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) infile = args[-1] parser.set_defaults(lag=0, k=0) if options.parallel: datfile = options.expr else: datfile = infile data = pd.read_table(datfile, sep="\t", index_col=0, header=0) # data should already be sorted in time-series order # the time and replicate columns needs to be dropped to ensure only the # gene data is passed into the DTW function # drop header line(s) and non-numerical rows try: data.drop(['times'], inplace=True, axis=0) data.drop(['replicates'], inplace=True, axis=0) except ValueError: pass genes = data.index data = data.convert_objects(convert_numeric=True) # iterate over the genes list in nested loops to get # all pair-wise combinations. if options.dist_metric == "dtw": if options.parallel: start_idx = int(infile.split("-")[3].split("_")[0]) end_idx = int(infile.split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.dtwWrapper(data=data, rows=genes, columns=slice_idx, k=options.k) else: df_ = TS.dtwWrapper(data=data, rows=genes, columns=genes, k=options.k) elif options.dist_metric == "cross-correlate": if options.lag is None: options.lag = 0 else: pass if options.parallel: start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0]) end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=slice_idx, method=options.dist_metric, lag=int(options.lag)) else: df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=genes, method=options.dist_metric, lag=int(options.lag)) elif options.dist_metric == "temporal-correlate": if options.parallel: start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0]) end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=slice_idx, method=options.dist_metric) else: df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=genes, method=options.dist_metric) if not options.outfile: df_.to_csv(options.stdout, sep="\t") else: df_.to_csv(options.outfile, sep="\t") # write footer and output benchmark information E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option("--k", dest="k", type="int", default=0, help="value of k to adjust adaptive tuning function") parser.add_option("--out", dest="outfile", type="string", help="output file name") parser.add_option("--expression-file", dest="expr", type="string", help="file containing expression data") parser.add_option("--parallel", dest="parallel", action="store_true", default=False, help="switches on parallel, will" " split distance matrix into relevant number of" " slices. Start-end positions are defined by" " the file name.") parser.add_option("--distance-metric", dest="dist_metric", type="string", help="distance metric to use for dissimilarity of time " "series objects. Choices: dtw, cross-correlate, " "temporal-correlate. Default=dtw") parser.add_option("--lag", dest="lag", type="string", help="cross correlation lag to report") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) infile = args[-1] parser.set_defaults(lag=0, k=0) if options.parallel: datfile = options.expr else: datfile = infile data = pd.read_table(datfile, sep="\t", index_col=0, header=0) # data should already be sorted in time-series order # the time and replicate columns needs to be dropped to ensure only the # gene data is passed into the DTW function # drop header line(s) and non-numerical rows try: data.drop(['times'], inplace=True, axis=0) data.drop(['replicates'], inplace=True, axis=0) except ValueError: pass genes = data.index data = data.convert_objects(convert_numeric=True) # iterate over the genes list in nested loops to get # all pair-wise combinations. if options.dist_metric == "dtw": if options.parallel: start_idx = int(infile.split("-")[3].split("_")[0]) end_idx = int(infile.split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.dtwWrapper(data=data, rows=genes, columns=slice_idx, k=options.k) else: df_ = TS.dtwWrapper(data=data, rows=genes, columns=genes, k=options.k) elif options.dist_metric == "cross-correlate": if options.lag is None: options.lag = 0 else: pass if options.parallel: start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0]) end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=slice_idx, method=options.dist_metric, lag=int(options.lag)) else: df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=genes, method=options.dist_metric, lag=int(options.lag)) elif options.dist_metric == "temporal-correlate": if options.parallel: start_idx = int(infile.split("/")[-1].split("-")[3].split("_")[0]) end_idx = int(infile.split("/")[-1].split("-")[3].split("_")[1]) slice_idx = genes[start_idx:end_idx] df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=slice_idx, method=options.dist_metric) else: df_ = TS.correlateDistanceMetric(data=data, rows=genes, columns=genes, method=options.dist_metric) if not options.outfile: df_.to_csv(options.stdout, sep="\t") else: df_.to_csv(options.outfile, sep="\t") # write footer and output benchmark information E.Stop()