def main(argv=None): if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: r_table2scatter.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option( "-c", "--columns", dest="columns", type="string", help= "columns to take from table. Choices are 'all', 'all-but-first' or a ','-separated list of columns." ) parser.add_option( "--logscale", dest="logscale", type="string", help="log-transform one or both axes [default=%Default].") parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string", help="write hardcopy to file [default=%default].", metavar="FILE") parser.add_option("-f", "--file", dest="input_filename", type="string", help="filename with table data [default=%default].", metavar="FILE") parser.add_option("-2", "--file2", dest="input_filename2", type="string", help="additional data file [default=%default].", metavar="FILE") parser.add_option( "-s", "--stats", dest="statistics", type="choice", choices=("correlation", "spearman", "pearson", "count"), help="statistical quantities to compute [default=%default]", action="append") parser.add_option("-p", "--plot", dest="plot", type="choice", choices=("scatter", "pairs", "panel", "bar", "bar-stacked", "bar-besides", "1_vs_x", "matched", "boxplot", "scatter+marginal", "scatter-regression"), help="plots to plot [default=%default]", action="append") parser.add_option( "-t", "--threshold", dest="threshold", type="float", help="min threshold to use for counting method [default=%default].") parser.add_option( "-o", "--colours", dest="colours", type="int", help="column with colour information [default=%default].") parser.add_option( "-l", "--plot-labels", dest="labels", type="string", help="column labels for x and y in matched plots [default=%default].") parser.add_option("-d", "--add-diagonal", dest="add_diagonal", action="store_true", help="add diagonal to plot [default=%default].") parser.add_option("-e", "--plot-legend", dest="legend", type="int", help="column with legend [default=%default].") parser.add_option("-r", "--options", dest="r_options", type="string", help="R plotting options [default=%default].") parser.add_option("--format", dest="format", type="choice", choices=("full", "sparse"), help="output format [default=%default].") parser.add_option("--title", dest="title", type="string", help="""plot title [default=%default].""") parser.add_option("", "--xrange", dest="xrange", type="string", help="x viewing range of plot [default=%default].") parser.add_option("", "--yrange", dest="yrange", type="string", help="y viewing range of plot[default=%default].") parser.add_option("--allow-empty-file", dest="fail_on_empty", action="store_false", help="do not fail on empty input [default=%default].") parser.add_option("--fail-on-empty", dest="fail_on_empty", action="store_true", help="fail on empty input [default=%default].") parser.set_defaults(hardcopy=None, input_filename="", input_filename2=None, columns="all", logscale=None, statistics=[], plot=[], threshold=0.0, labels="x,y", colours=None, diagonal=False, legend=None, title=None, xrange=None, yrange=None, r_options="", fail_on_empty=True, format="full") (options, args) = E.Start(parser) if len(args) == 1 and not options.input_filename: options.input_filename = args[0] if options.columns not in ("all", "all-but-first"): options.columns = [int(x) - 1 for x in options.columns.split(",")] if options.colours: options.colours -= 1 if options.legend: options.legend -= 1 table = {} headers = [] # read data matrix if options.input_filename: lines = IOTools.openFile(options.input_filename, "r").readlines() else: # note: this will not work for interactive viewing, but # creating hardcopy plots works. lines = sys.stdin.readlines() lines = [x for x in lines if x[0] != "#"] if len(lines) == 0: if options.fail_on_empty: raise IOError("no input") E.warn("empty input") E.Stop() return matrix, headers, colours, legend = readTable(lines, "matrix", take_columns=options.columns, headers=True, colours=options.colours, row_names=options.legend) if options.input_filename2: # read another matrix (should be of the same format. matrix2, headers2, colours2, legend2 = readTable( lines, "matrix2", take_columns=options.columns, headers=True, colours=options.colours, row_names=options.legend) R.assign("headers", headers) ndata = R("""length( matrix[,1] )""")[0] if options.loglevel >= 1: options.stdlog.write("# read matrix: %ix%i\n" % (len(headers), ndata)) if colours: R.assign("colours", colours) for method in options.statistics: if method == "correlation": cor = R.cor(matrix, use="pairwise.complete.obs") writeMatrix(sys.stdout, cor, headers=headers, format="%5.2f") elif method == "pearson": options.stdout.write("\t".join(("var1", "var2", "coeff", "passed", "pvalue", "n", "method", "alternative")) + "\n") for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): try: result = R("""cor.test( matrix[,%i], matrix[,%i] )""" % (x + 1, y + 1)) except rpy.RPyException as msg: E.warn( "correlation not computed for columns %i(%s) and %i(%s): %s" % (x, headers[x], y, headers[y], msg)) options.stdout.write( "%s\t%s\t%s\t%s\t%s\t%i\t%s\t%s\n" % (headers[x], headers[y], "na", "na", "na", 0, "na", "na")) else: options.stdout.write( "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" % (headers[x], headers[y], result.rx2('estimate').rx2('cor')[0], Stats.getSignificance( float(result.rx2('p.value')[0])), result.rx2('p.value')[0], result.rx2('parameter').rx2('df')[0], result.rx2('method')[0], result.rx2('alternative')[0])) elif method == "spearman": options.stdout.write("\t".join(("var1", "var2", "coeff", "passed", "pvalue", "method", "alternative")) + "\n") for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): result = R( """cor.test( matrix[,%i], matrix[,%i], method='spearman')""" % (x + 1, y + 1)) options.stdout.write( "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" % (headers[x], headers[y], result['estimate']['rho'], Stats.getSignificance(float(result['p.value'])), result['p.value'], result['parameter']['df'], result['method'], result['alternative'])) elif method == "count": # number of shared elements > threshold m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"), take=options.columns, headers=True) mask = numpy.greater(m, options.threshold) counts = numpy.dot(numpy.transpose(mask), mask) writeMatrix(options.stdout, counts, headers=c, format="%i") if options.plot: # remove columns that are completely empty if "pairs" in options.plot: colsums = R('''colSums( is.na(matrix ))''') take = [x for x in range(len(colsums)) if colsums[x] != ndata] if take: E.warn("removing empty columns %s before plotting" % str(take)) matrix = R.subset(matrix, select=[x + 1 for x in take]) R.assign("""matrix""", matrix) headers = [headers[x] for x in take] if legend: legend = [headers[x] for x in take] if options.r_options: extra_options = ", %s" % options.r_options else: extra_options = "" if options.legend is not None and len(legend): extra_options += ", legend=c('%s')" % "','".join(legend) if options.labels: xlabel, ylabel = options.labels.split(",") extra_options += ", xlab='%s', ylab='%s'" % (xlabel, ylabel) else: xlabel, ylabel = "", "" if options.colours: extra_options += ", col=colours" if options.logscale: extra_options += ", log='%s'" % options.logscale if options.xrange: extra_options += ", xlim=c(%f,%f)" % tuple( map(float, options.xrange.split(","))) if options.yrange: extra_options += ", ylim=c(%f,%f)" % tuple( map(float, options.yrange.split(","))) if options.hardcopy: if options.hardcopy.endswith(".eps"): R.postscript(options.hardcopy) elif options.hardcopy.endswith(".png"): R.png(options.hardcopy, width=1024, height=768, type="cairo") elif options.hardcopy.endswith(".jpg"): R.jpg(options.hardcopy, width=1024, height=768, type="cairo") for method in options.plot: if ndata < 100: point_size = "1" pch = "o" elif ndata < 1000: point_size = "1" pch = "o" else: point_size = "0.5" pch = "." if method == "scatter": R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % (point_size, extra_options)) if method == "scatter-regression": R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % (point_size, extra_options)) dat = R( """dat <- data.frame(x = matrix[,1], y = matrix[,2])""") R("""new <- data.frame(x = seq( min(matrix[,1]), max(matrix[,1]), (max(matrix[,1]) - min(matrix[,1])) / 100))""" ) mod = R("""mod <- lm( y ~ x, dat)""") R("""predict(mod, new, se.fit = TRUE)""") R("""pred.w.plim <- predict(mod, new, interval="prediction")""" ) R("""pred.w.clim <- predict(mod, new, interval="confidence")""" ) R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""" ) R.mtext("y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], mod["coefficients"]["(Intercept)"], R("""cor( dat )[2]"""), ndata), 3, cex=1.0) elif method == "pairs": if options.add_diagonal: R("""panel.hist <- function( x,y,... ) { points(x,y,...); abline(0,1); }""" ) else: R("""panel.hist <- function( x,y,... ) { points(x,y,...); }""" ) # There used to be a argument na_action="na.omit", but # removed this as there appeared error messages saying # "na.action is not a graphical parameter" and the # plots showed occasionally the wrong scale. # cex=point_size also caused trouble (error message: # "X11 used font size 8 when 2 was requested" or # similar) if options.colours: R.pairs(matrix, pch=pch, col=colours, main=options.title, panel="panel.hist", labels=headers, cex_labels=2.0) else: R.pairs(matrix, pch=pch, panel="panel.hist", main=options.title, labels=headers, cex_labels=2.0) elif method == "boxplot": extra_options += ",main='%s'" % options.title # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R("""op <- par(mar=c(11,4,4,2))""" ) # the 10 allows the names.arg below the barplot R("""boxplot( matrix %s)""" % extra_options) elif method == "bar" or method == "bar-stacked": if not options.colours: extra_options += ", col=rainbow(5)" # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R("""op <- par(mar=c(11,4,4,2))""" ) # the 10 allows the names.arg below the barplot R("""barplot(as.matrix(matrix), %s)""" % extra_options) elif method == "bar-besides": if not options.colours: extra_options += ", col=rainbow(%i)" % ndata # set vertical orientation if max([len(x) for x in headers]) > 40 / len(headers): # remove xlabel: extra_options = re.sub(", xlab='[^']+'", "", extra_options) extra_options += ", names.arg=headers, las=2" R("""op <- par(mar=c(11,4,4,2))""" ) # the 10 allows the names.arg below the barplot R("""barplot(as.matrix(matrix), beside=TRUE %s)""" % extra_options) elif method == "scatter+marginal": if options.title: # set the size of the outer margins - the title needs to be added at the end # after plots have been created R.par(oma=R.c(0, 0, 4, 0)) R("""matrix""") R(""" x <- matrix[,1]; y <- matrix[,2]; xhist <- hist(x, breaks=20, plot=FALSE); yhist <- hist(y, breaks=20, plot=FALSE); top <- max(c(xhist$counts, yhist$counts)); nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE ); par(mar=c(3,3,1,1)) ; plot(x, y, cex=%s, pch="o" %s) ; par(mar=c(0,3,1,1)) ; barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ; par(mar=c(3,0,1,1)) ; title(main='%s'); barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE ) ; title(main='%s'); """ % (point_size, extra_options, xlabel, ylabel)) if options.title: R.mtext(options.title, 3, outer=True, line=1, cex=1.5) elif method in ("panel", "1_vs_x", "matched"): if method == "panel": pairs = [] for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): pairs.append((x, y)) elif method == "1_vs_x": pairs = [] for x in range(1, len(headers)): pairs.append((0, x)) # print matching columns elif method == "matched": pairs = [] for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): if headers[x] == headers[y]: pairs.append((x, y)) break w = int(math.ceil(math.sqrt(len(pairs)))) h = int(math.ceil(float(len(pairs)) / w)) PosInf = 1e300000 NegInf = -1e300000 xlabel, ylabel = options.labels.split(",") R("""layout(matrix(seq(1,%i), %i, %i, byrow = TRUE))""" % (w * h, w, h)) for a, b in pairs: new_matrix = [ x for x in zip( list(matrix[a].values())[0], list(matrix[b].values())[0]) if x[0] not in (float("nan"), PosInf, NegInf) and x[1] not in (float("nan"), PosInf, NegInf) ] try: R("""plot(matrix[,%i], matrix[,%i], main='%s versus %s', cex=0.5, pch=".", xlab='%s', ylab='%s' )""" % (a + 1, b + 1, headers[b], headers[a], xlabel, ylabel)) except rpy.RException as msg: print("could not plot %s versus %s: %s" % (headers[b], headers[a], msg)) if options.hardcopy: R['dev.off']() E.info("matrix added as >matrix< in R.") if not options.hardcopy: if options.input_filename: interpreter = code.InteractiveConsole(globals()) interpreter.interact() else: E.info( "can not start new interactive session as input has come from stdin." ) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=( "normalize-by-min-diagonal", "normalize-by-column", "log", "ln", "negzero2value", "set-diagonal", "subtract-matrix", "mix-matrix", "normalize-by-matrix", "normalize-by-column-max", "normalize-by-row-max", "normalize-by-column-min", "normalize-by-row-min", "normalize-by-column-median", "normalize-by-row-median", "normalize-by-column-mean", "normalize-by-row-mean", "normalize-by-column-total", "normalize-by-row-total", "correspondence-analysis", "normalize-by-value", "add-value", "sort-rows", "sort-columns", "transpose", "upper-bound", "lower-bound", "subtract-first-col", "multiply-by-value", "divide-by-value", "mask-rows", "mask-columns", "mask-rows-and-columns", "symmetrize-mean", "symmetrize-max", "symmetrize-min", ), help="""method to use [default=%default]""") parser.add_option("-s", "--scale", dest="scale", type="float", help="factor to scale matrix by [default=%default].") parser.add_option("-f", "--format", dest="format", type="string", help="output number format [default=%default].") parser.add_option("--filename-rows", dest="filename_rows", type="string", help="filename with rows to mask [default=%default].") parser.add_option("--filename-columns", dest="filename_columns", type="string", help="filename with columns to mask [default=%default].") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="Parameters for various functions.") parser.add_option("-t", "--headers", dest="headers", action="store_true", help="matrix has row/column headers.") parser.add_option("--no-headers", dest="headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("-a", "--value", dest="value", type="float", help="value to use for various algorithms.") parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=("full", "sparse", "phylip"), help="""input format for matrix.""") parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("full", "sparse", "phylip"), help="""output format for matrix.""") parser.add_option( "--missing", dest="missing", type="float", help= "value to use for missing values. If not set, missing values will cause the script to fail [default=%default]." ) parser.set_defaults( methods=[], scale=1.0, headers=True, format="%6.4f", output_format="full", input_format="full", value=0.0, parameters="", write_separators=True, filename_rows=None, filename_columns=None, missing=None, ) (options, args) = E.Start(parser) options.parameters = options.parameters.split(",") lines = filter(lambda x: x[0] != "#", sys.stdin.readlines()) if len(lines) == 0: raise IOError("no input") chunks = filter(lambda x: lines[x][0] == ">", range(len(lines))) if not chunks: options.write_separators = False chunks = [-1] chunks.append(len(lines)) if options.filename_rows: row_names, n = IOTools.ReadList(open(options.filename_rows, "r")) if options.filename_columns: column_names, n = IOTools.ReadList(open(options.filename_columns, "r")) for chunk in range(len(chunks) - 1): try: raw_matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO.StringIO("".join(lines[chunks[chunk] + 1:chunks[chunk + 1]])), format=options.input_format, headers=options.headers, missing=options.missing) except ValueError, msg: E.warn("matrix could not be read: %s" % msg) continue nrows, ncols = raw_matrix.shape E.debug("read matrix: %i x %i, %i row titles, %i colum titles" % (nrows, ncols, len(row_headers), len(col_headers))) parameter = 0 for method in options.methods: matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape) if method in ("normalize-by-matrix", "subtract-matrix", "mix-matrix", "add-matrix"): other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix( open(options.parameters[parameter], "r"), headers=options.headers) other_nrows, other_ncols = other_matrix.shape if options.loglevel >= 2: options.stdlog.write( "# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n" % (options.parameters[parameter], other_nrows, other_ncols, len(other_row_headers), len(other_col_headers))) parameter += 1 elif method == "normalize-by-min-diagonal": for x in range(nrows): for y in range(ncols): m = min(raw_matrix[x, x], raw_matrix[y, y]) if m > 0: matrix[x, y] = raw_matrix[x, y] / m elif method == "normalize-by-column": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y] elif method == "normalize-by-value": matrix = raw_matrix / float(options.parameters[parameter]) parameter += 1 elif method == "normalize-by-row": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x] elif method == "subtract-first-col": for x in range(nrows): for y in range(ncols): matrix[x, y] -= raw_matrix[x, 0] elif method.startswith("normalize-by-column"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for y in range(ncols): m = f(matrix[:, y]) if m != 0: for x in range(nrows): matrix[x, y] = matrix[x, y] / m elif method.startswith("normalize-by-row"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for x in range(nrows): m = f(matrix[x, :]) if m != 0: for y in range(ncols): matrix[x, y] = raw_matrix[x, y] / m elif method == "negzero2value": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): if matrix[x, y] <= 0: matrix[x, y] = options.value elif method == "minmax": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): matrix[x, y], matrix[y, x] = \ min(matrix[x, y], matrix[y, x]), \ max(matrix[x, y], matrix[y, x]) elif method == "log": # apply log to all values. for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log10(matrix[x, y]) elif method == "ln": for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log(matrix[x, y]) elif method == "transpose": matrix = numpy.transpose(matrix) row_headers, col_headers = col_headers, row_headers nrows, ncols = ncols, nrows elif method == "mul": matrix = numpy.dot(matrix, numpy.transpose(matrix)) col_headers = row_headers elif method == "multiply-by-value": matrix *= options.value elif method == "divide-by-value": matrix /= options.value elif method == "add-value": matrix += options.value elif method == "angle": # write angles between col vectors v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0)) matrix = numpy.dot(numpy.transpose(matrix), matrix) row_headers = col_headers nrows = ncols for x in range(nrows): for y in range(ncols): matrix[x, y] /= v1[x] * v1[y] elif method == "euclid": # convert to euclidean distance matrix matrix = numpy.zeros((ncols, ncols), numpy.float) for c1 in range(0, ncols - 1): for c2 in range(c1 + 1, ncols): for r in range(0, nrows): d = raw_matrix[r][c1] - raw_matrix[r][c2] matrix[c1, c2] += (d * d) matrix[c2, c1] = matrix[c1, c2] matrix = numpy.sqrt(matrix) row_headers = col_headers nrows = ncols elif method.startswith("symmetrize"): f = method.split("-")[1] if f == "max": f = max elif f == "min": f = min elif f == "mean": f = lambda x, y: float(x + y) / 2 if nrows != ncols: raise ValueError( "symmetrize only available for symmetric matrices") if row_headers != col_headers: raise ValueError( "symmetrize not available for permuted matrices") for x in range(nrows): for y in range(ncols): matrix[x, y] = matrix[y, x] = f(matrix[x, y], matrix[y, x]) elif method == "sub": matrix = options.value - matrix elif method in ("lower-bound", "upper-bound"): boundary = float(options.parameters[parameter]) new_value = float(options.parameters[parameter + 1]) parameter += 2 if method == "upper-bound": for x in range(nrows): for y in range(ncols): if matrix[x, y] > boundary: matrix[x, y] = new_value else: for x in range(nrows): for y in range(ncols): if matrix[x, y] < boundary: matrix[x, y] = new_value elif method == "subtract-matrix": matrix = matrix - other_matrix elif method == "add-matrix": matrix = matrix + other_matrix elif method == "normalize-by-matrix": # set 0s to 1 in the other matrix for x in range(nrows): for y in range(ncols): if other_matrix[x, y] == 0: other_matrix[x, y] = 1.0 matrix = matrix / other_matrix elif method == "mix-matrix": for x in range(len(other_row_headers) - 1): for y in range(x + 1, len(other_col_headers)): matrix[x, y] = other_matrix[x, y] elif method == "set-diagonal": value = float(options.parameters[parameter]) for x in range(min(nrows, ncols)): matrix[x, x] = value parameter += 1 elif method == "transpose": matrix = numpy.transpose(raw_matrix) row_headers, col_headers = col_headers, row_headers elif method == "correspondence-analysis": row_indices, col_indices = CorrespondenceAnalysis.GetIndices( raw_matrix) map_row_new2old = numpy.argsort(row_indices) map_col_new2old = numpy.argsort(col_indices) matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix( raw_matrix, map_row_new2old, map_col_new2old, row_headers=row_headers, col_headers=col_headers) elif method == "mask-rows": r = set(row_names) for x in range(len(row_headers)): if row_headers[x] in r: matrix[x, :] = options.value elif method == "mask-columns": r = set(column_names) for x in range(len(col_headers)): if col_headers[x] in r: matrix[:, x] = options.value elif method == "mask-rows-and-columns": r = set(row_names) c = set(column_names) for x in range(len(row_headers)): for y in range(len(col_headers)): if row_headers[x] in r and col_headers[y] in c: matrix[x, y] = options.value raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape) else: # for simple re-formatting jobs matrix = raw_matrix if options.write_separators: options.stdout.write(lines[chunks[chunk]]) MatlabTools.writeMatrix(sys.stdout, matrix, value_format=options.format, format=options.output_format, row_headers=row_headers, col_headers=col_headers)
for x in range(len(headers) - 1): for y in range(x + 1, len(headers)): result = R( """cor.test( matrix[,%i], matrix[,%i], method='spearman')""" % (x + 1, y + 1)) options.stdout.write( "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" % (headers[x], headers[y], result['estimate']['rho'], Stats.getSignificance(float(result['p.value'])), result['p.value'], result['parameter']['df'], result['method'], result['alternative'])) elif method == "count": # number of shared elements > threshold m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"), take=options.columns, headers=True) mask = numpy.greater(m, options.threshold) counts = numpy.dot(numpy.transpose(mask), mask) writeMatrix(options.stdout, counts, headers=c, format="%i") if options.plot: # remove columns that are completely empty if "pairs" in options.plot: colsums = R('''colSums( is.na(matrix ))''') take = [x for x in range(len(colsums)) if colsums[x] != ndata] if take: E.warn("removing empty columns %s before plotting" % str(take)) matrix = R.subset(matrix, select=[x + 1 for x in take]) R.assign("""matrix""", matrix)