Python MatlabTools.ReadMatrix примеры использования

Язык программирования: Python

Пространство имен/Пакет: CGAT

Класс/Тип: MatlabTools

Метод/Функция: ReadMatrix

Примеров на hotexamples.com: 3

Python MatlabTools.ReadMatrix - 3 примера найдено. Это лучшие примеры Python кода для CGAT.MatlabTools.ReadMatrix, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

readMatrix(5)

ReadMatrix(3)

WriteMatrix(1)

writeMatrix(1)

Пример #1

Показать файл

Файл: r_table2scatter.py Проект: wangdi2014/cgat

def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: r_table2scatter.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option(
        "-c",
        "--columns",
        dest="columns",
        type="string",
        help=
        "columns to take from table. Choices are 'all', 'all-but-first' or a ','-separated list of columns."
    )

    parser.add_option(
        "--logscale",
        dest="logscale",
        type="string",
        help="log-transform one or both axes [default=%Default].")

    parser.add_option("-a",
                      "--hardcopy",
                      dest="hardcopy",
                      type="string",
                      help="write hardcopy to file [default=%default].",
                      metavar="FILE")

    parser.add_option("-f",
                      "--file",
                      dest="input_filename",
                      type="string",
                      help="filename with table data [default=%default].",
                      metavar="FILE")

    parser.add_option("-2",
                      "--file2",
                      dest="input_filename2",
                      type="string",
                      help="additional data file [default=%default].",
                      metavar="FILE")

    parser.add_option(
        "-s",
        "--stats",
        dest="statistics",
        type="choice",
        choices=("correlation", "spearman", "pearson", "count"),
        help="statistical quantities to compute [default=%default]",
        action="append")

    parser.add_option("-p",
                      "--plot",
                      dest="plot",
                      type="choice",
                      choices=("scatter", "pairs", "panel", "bar",
                               "bar-stacked", "bar-besides", "1_vs_x",
                               "matched", "boxplot", "scatter+marginal",
                               "scatter-regression"),
                      help="plots to plot [default=%default]",
                      action="append")

    parser.add_option(
        "-t",
        "--threshold",
        dest="threshold",
        type="float",
        help="min threshold to use for counting method [default=%default].")

    parser.add_option(
        "-o",
        "--colours",
        dest="colours",
        type="int",
        help="column with colour information [default=%default].")

    parser.add_option(
        "-l",
        "--plot-labels",
        dest="labels",
        type="string",
        help="column labels for x and y in matched plots [default=%default].")

    parser.add_option("-d",
                      "--add-diagonal",
                      dest="add_diagonal",
                      action="store_true",
                      help="add diagonal to plot [default=%default].")

    parser.add_option("-e",
                      "--plot-legend",
                      dest="legend",
                      type="int",
                      help="column with legend [default=%default].")

    parser.add_option("-r",
                      "--options",
                      dest="r_options",
                      type="string",
                      help="R plotting options [default=%default].")

    parser.add_option("--format",
                      dest="format",
                      type="choice",
                      choices=("full", "sparse"),
                      help="output format [default=%default].")

    parser.add_option("--title",
                      dest="title",
                      type="string",
                      help="""plot title [default=%default].""")

    parser.add_option("",
                      "--xrange",
                      dest="xrange",
                      type="string",
                      help="x viewing range of plot [default=%default].")

    parser.add_option("",
                      "--yrange",
                      dest="yrange",
                      type="string",
                      help="y viewing range of plot[default=%default].")

    parser.add_option("--allow-empty-file",
                      dest="fail_on_empty",
                      action="store_false",
                      help="do not fail on empty input [default=%default].")

    parser.add_option("--fail-on-empty",
                      dest="fail_on_empty",
                      action="store_true",
                      help="fail on empty input [default=%default].")

    parser.set_defaults(hardcopy=None,
                        input_filename="",
                        input_filename2=None,
                        columns="all",
                        logscale=None,
                        statistics=[],
                        plot=[],
                        threshold=0.0,
                        labels="x,y",
                        colours=None,
                        diagonal=False,
                        legend=None,
                        title=None,
                        xrange=None,
                        yrange=None,
                        r_options="",
                        fail_on_empty=True,
                        format="full")

    (options, args) = E.Start(parser)

    if len(args) == 1 and not options.input_filename:
        options.input_filename = args[0]

    if options.columns not in ("all", "all-but-first"):
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    if options.colours:
        options.colours -= 1
    if options.legend:
        options.legend -= 1

    table = {}
    headers = []

    # read data matrix
    if options.input_filename:
        lines = IOTools.openFile(options.input_filename, "r").readlines()
    else:
        # note: this will not work for interactive viewing, but
        # creating hardcopy plots works.
        lines = sys.stdin.readlines()

    lines = [x for x in lines if x[0] != "#"]

    if len(lines) == 0:
        if options.fail_on_empty:
            raise IOError("no input")
        E.warn("empty input")
        E.Stop()
        return

    matrix, headers, colours, legend = readTable(lines,
                                                 "matrix",
                                                 take_columns=options.columns,
                                                 headers=True,
                                                 colours=options.colours,
                                                 row_names=options.legend)

    if options.input_filename2:
        # read another matrix (should be of the same format.
        matrix2, headers2, colours2, legend2 = readTable(
            lines,
            "matrix2",
            take_columns=options.columns,
            headers=True,
            colours=options.colours,
            row_names=options.legend)

    R.assign("headers", headers)

    ndata = R("""length( matrix[,1] )""")[0]

    if options.loglevel >= 1:
        options.stdlog.write("# read matrix: %ix%i\n" % (len(headers), ndata))

    if colours:
        R.assign("colours", colours)

    for method in options.statistics:

        if method == "correlation":
            cor = R.cor(matrix, use="pairwise.complete.obs")
            writeMatrix(sys.stdout, cor, headers=headers, format="%5.2f")

        elif method == "pearson":
            options.stdout.write("\t".join(("var1", "var2", "coeff", "passed",
                                            "pvalue", "n", "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    try:
                        result = R("""cor.test( matrix[,%i], matrix[,%i] )""" %
                                   (x + 1, y + 1))
                    except rpy.RPyException as msg:
                        E.warn(
                            "correlation not computed for columns %i(%s) and %i(%s): %s"
                            % (x, headers[x], y, headers[y], msg))
                        options.stdout.write(
                            "%s\t%s\t%s\t%s\t%s\t%i\t%s\t%s\n" %
                            (headers[x], headers[y], "na", "na", "na", 0, "na",
                             "na"))

                    else:
                        options.stdout.write(
                            "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                            (headers[x], headers[y],
                             result.rx2('estimate').rx2('cor')[0],
                             Stats.getSignificance(
                                 float(result.rx2('p.value')[0])),
                             result.rx2('p.value')[0],
                             result.rx2('parameter').rx2('df')[0],
                             result.rx2('method')[0],
                             result.rx2('alternative')[0]))

        elif method == "spearman":
            options.stdout.write("\t".join(("var1", "var2", "coeff", "passed",
                                            "pvalue", "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    result = R(
                        """cor.test( matrix[,%i], matrix[,%i], method='spearman')"""
                        % (x + 1, y + 1))
                    options.stdout.write(
                        "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                        (headers[x], headers[y], result['estimate']['rho'],
                         Stats.getSignificance(float(result['p.value'])),
                         result['p.value'], result['parameter']['df'],
                         result['method'], result['alternative']))

        elif method == "count":
            # number of shared elements > threshold
            m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"),
                                             take=options.columns,
                                             headers=True)
            mask = numpy.greater(m, options.threshold)
            counts = numpy.dot(numpy.transpose(mask), mask)
            writeMatrix(options.stdout, counts, headers=c, format="%i")

    if options.plot:

        # remove columns that are completely empty
        if "pairs" in options.plot:
            colsums = R('''colSums( is.na(matrix ))''')
            take = [x for x in range(len(colsums)) if colsums[x] != ndata]
            if take:
                E.warn("removing empty columns %s before plotting" % str(take))
                matrix = R.subset(matrix, select=[x + 1 for x in take])
                R.assign("""matrix""", matrix)
                headers = [headers[x] for x in take]
                if legend:
                    legend = [headers[x] for x in take]

        if options.r_options:
            extra_options = ", %s" % options.r_options
        else:
            extra_options = ""

        if options.legend is not None and len(legend):
            extra_options += ", legend=c('%s')" % "','".join(legend)

        if options.labels:
            xlabel, ylabel = options.labels.split(",")
            extra_options += ", xlab='%s', ylab='%s'" % (xlabel, ylabel)
        else:
            xlabel, ylabel = "", ""

        if options.colours:
            extra_options += ", col=colours"

        if options.logscale:
            extra_options += ", log='%s'" % options.logscale

        if options.xrange:
            extra_options += ", xlim=c(%f,%f)" % tuple(
                map(float, options.xrange.split(",")))

        if options.yrange:
            extra_options += ", ylim=c(%f,%f)" % tuple(
                map(float, options.yrange.split(",")))

        if options.hardcopy:
            if options.hardcopy.endswith(".eps"):
                R.postscript(options.hardcopy)
            elif options.hardcopy.endswith(".png"):
                R.png(options.hardcopy, width=1024, height=768, type="cairo")
            elif options.hardcopy.endswith(".jpg"):
                R.jpg(options.hardcopy, width=1024, height=768, type="cairo")

        for method in options.plot:

            if ndata < 100:
                point_size = "1"
                pch = "o"
            elif ndata < 1000:
                point_size = "1"
                pch = "o"
            else:
                point_size = "0.5"
                pch = "."

            if method == "scatter":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" %
                  (point_size, extra_options))

            if method == "scatter-regression":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" %
                  (point_size, extra_options))
                dat = R(
                    """dat <- data.frame(x = matrix[,1], y = matrix[,2])""")
                R("""new <- data.frame(x = seq( min(matrix[,1]), max(matrix[,1]), (max(matrix[,1]) - min(matrix[,1])) / 100))"""
                  )
                mod = R("""mod <- lm( y ~ x, dat)""")
                R("""predict(mod, new, se.fit = TRUE)""")
                R("""pred.w.plim <- predict(mod, new, interval="prediction")"""
                  )
                R("""pred.w.clim <- predict(mod, new, interval="confidence")"""
                  )
                R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")"""
                  )
                R.mtext("y = %f * x + %f, r=%6.4f, n=%i" %
                        (mod["coefficients"]["x"],
                         mod["coefficients"]["(Intercept)"],
                         R("""cor( dat )[2]"""), ndata),
                        3,
                        cex=1.0)

            elif method == "pairs":
                if options.add_diagonal:
                    R("""panel.hist <- function( x,y,...  ) { points(x,y,...); abline(0,1); }"""
                      )
                else:
                    R("""panel.hist <- function( x,y,...  ) { points(x,y,...); }"""
                      )

                # There used to be a argument na_action="na.omit", but
                # removed this as there appeared error messages saying
                # "na.action is not a graphical parameter" and the
                # plots showed occasionally the wrong scale.
                # cex=point_size also caused trouble (error message:
                # "X11 used font size 8 when 2 was requested" or
                # similar)
                if options.colours:
                    R.pairs(matrix,
                            pch=pch,
                            col=colours,
                            main=options.title,
                            panel="panel.hist",
                            labels=headers,
                            cex_labels=2.0)
                else:
                    R.pairs(matrix,
                            pch=pch,
                            panel="panel.hist",
                            main=options.title,
                            labels=headers,
                            cex_labels=2.0)

            elif method == "boxplot":
                extra_options += ",main='%s'" % options.title

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""boxplot( matrix %s)""" % extra_options)

            elif method == "bar" or method == "bar-stacked":
                if not options.colours:
                    extra_options += ", col=rainbow(5)"

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), %s)""" % extra_options)

            elif method == "bar-besides":
                if not options.colours:
                    extra_options += ", col=rainbow(%i)" % ndata

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), beside=TRUE %s)""" %
                  extra_options)

            elif method == "scatter+marginal":

                if options.title:
                    # set the size of the outer margins - the title needs to be added at the end
                    # after plots have been created
                    R.par(oma=R.c(0, 0, 4, 0))

                R("""matrix""")
                R("""
x <- matrix[,1];
y <- matrix[,2];
xhist <- hist(x, breaks=20, plot=FALSE);
yhist <- hist(y, breaks=20, plot=FALSE);
top <- max(c(xhist$counts, yhist$counts));
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE );
par(mar=c(3,3,1,1)) ;
plot(x, y, cex=%s, pch="o" %s) ;
par(mar=c(0,3,1,1)) ;
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ;
par(mar=c(3,0,1,1)) ;
title(main='%s');
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE ) ;
title(main='%s');
""" % (point_size, extra_options, xlabel, ylabel))

                if options.title:
                    R.mtext(options.title, 3, outer=True, line=1, cex=1.5)

            elif method in ("panel", "1_vs_x", "matched"):

                if method == "panel":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            pairs.append((x, y))

                elif method == "1_vs_x":
                    pairs = []
                    for x in range(1, len(headers)):
                        pairs.append((0, x))

                # print matching columns
                elif method == "matched":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            if headers[x] == headers[y]:
                                pairs.append((x, y))
                                break

                w = int(math.ceil(math.sqrt(len(pairs))))
                h = int(math.ceil(float(len(pairs)) / w))

                PosInf = 1e300000
                NegInf = -1e300000

                xlabel, ylabel = options.labels.split(",")

                R("""layout(matrix(seq(1,%i), %i, %i, byrow = TRUE))""" %
                  (w * h, w, h))
                for a, b in pairs:
                    new_matrix = [
                        x for x in zip(
                            list(matrix[a].values())[0],
                            list(matrix[b].values())[0])
                        if x[0] not in (float("nan"), PosInf, NegInf)
                        and x[1] not in (float("nan"), PosInf, NegInf)
                    ]
                    try:
                        R("""plot(matrix[,%i], matrix[,%i], main='%s versus %s', cex=0.5, pch=".", xlab='%s', ylab='%s' )"""
                          % (a + 1, b + 1, headers[b], headers[a], xlabel,
                             ylabel))
                    except rpy.RException as msg:
                        print("could not plot %s versus %s: %s" %
                              (headers[b], headers[a], msg))

        if options.hardcopy:
            R['dev.off']()

    E.info("matrix added as >matrix< in R.")

    if not options.hardcopy:
        if options.input_filename:
            interpreter = code.InteractiveConsole(globals())
            interpreter.interact()
        else:
            E.info(
                "can not start new interactive session as input has come from stdin."
            )

    E.Stop()

Пример #2

Показать файл

Файл: matrix2matrix.py Проект: santayana/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=(
                          "normalize-by-min-diagonal",
                          "normalize-by-column",
                          "log",
                          "ln",
                          "negzero2value",
                          "set-diagonal",
                          "subtract-matrix",
                          "mix-matrix",
                          "normalize-by-matrix",
                          "normalize-by-column-max",
                          "normalize-by-row-max",
                          "normalize-by-column-min",
                          "normalize-by-row-min",
                          "normalize-by-column-median",
                          "normalize-by-row-median",
                          "normalize-by-column-mean",
                          "normalize-by-row-mean",
                          "normalize-by-column-total",
                          "normalize-by-row-total",
                          "correspondence-analysis",
                          "normalize-by-value",
                          "add-value",
                          "sort-rows",
                          "sort-columns",
                          "transpose",
                          "upper-bound",
                          "lower-bound",
                          "subtract-first-col",
                          "multiply-by-value",
                          "divide-by-value",
                          "mask-rows",
                          "mask-columns",
                          "mask-rows-and-columns",
                          "symmetrize-mean",
                          "symmetrize-max",
                          "symmetrize-min",
                      ),
                      help="""method to use [default=%default]""")

    parser.add_option("-s",
                      "--scale",
                      dest="scale",
                      type="float",
                      help="factor to scale matrix by [default=%default].")

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="string",
                      help="output number format [default=%default].")

    parser.add_option("--filename-rows",
                      dest="filename_rows",
                      type="string",
                      help="filename with rows to mask [default=%default].")

    parser.add_option("--filename-columns",
                      dest="filename_columns",
                      type="string",
                      help="filename with columns to mask [default=%default].")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="Parameters for various functions.")

    parser.add_option("-t",
                      "--headers",
                      dest="headers",
                      action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--no-headers",
                      dest="headers",
                      action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("-a",
                      "--value",
                      dest="value",
                      type="float",
                      help="value to use for various algorithms.")

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""input format for matrix.""")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""output format for matrix.""")

    parser.add_option(
        "--missing",
        dest="missing",
        type="float",
        help=
        "value to use for missing values. If not set, missing values will cause the script to fail [default=%default]."
    )

    parser.set_defaults(
        methods=[],
        scale=1.0,
        headers=True,
        format="%6.4f",
        output_format="full",
        input_format="full",
        value=0.0,
        parameters="",
        write_separators=True,
        filename_rows=None,
        filename_columns=None,
        missing=None,
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    if len(lines) == 0:
        raise IOError("no input")

    chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    if options.filename_rows:
        row_names, n = IOTools.ReadList(open(options.filename_rows, "r"))
    if options.filename_columns:
        column_names, n = IOTools.ReadList(open(options.filename_columns, "r"))

    for chunk in range(len(chunks) - 1):

        try:
            raw_matrix, row_headers, col_headers = MatlabTools.readMatrix(
                StringIO.StringIO("".join(lines[chunks[chunk] +
                                                1:chunks[chunk + 1]])),
                format=options.input_format,
                headers=options.headers,
                missing=options.missing)
        except ValueError, msg:
            E.warn("matrix could not be read: %s" % msg)
            continue

        nrows, ncols = raw_matrix.shape

        E.debug("read matrix: %i x %i, %i row titles, %i colum titles" %
                (nrows, ncols, len(row_headers), len(col_headers)))

        parameter = 0

        for method in options.methods:

            matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape)

            if method in ("normalize-by-matrix", "subtract-matrix",
                          "mix-matrix", "add-matrix"):

                other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix(
                    open(options.parameters[parameter], "r"),
                    headers=options.headers)

                other_nrows, other_ncols = other_matrix.shape

                if options.loglevel >= 2:
                    options.stdlog.write(
                        "# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n"
                        % (options.parameters[parameter], other_nrows,
                           other_ncols, len(other_row_headers),
                           len(other_col_headers)))

                parameter += 1

            elif method == "normalize-by-min-diagonal":
                for x in range(nrows):
                    for y in range(ncols):
                        m = min(raw_matrix[x, x], raw_matrix[y, y])
                        if m > 0:
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "normalize-by-column":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y]

            elif method == "normalize-by-value":
                matrix = raw_matrix / float(options.parameters[parameter])
                parameter += 1

            elif method == "normalize-by-row":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x]

            elif method == "subtract-first-col":
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] -= raw_matrix[x, 0]

            elif method.startswith("normalize-by-column"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for y in range(ncols):
                    m = f(matrix[:, y])
                    if m != 0:
                        for x in range(nrows):
                            matrix[x, y] = matrix[x, y] / m

            elif method.startswith("normalize-by-row"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for x in range(nrows):
                    m = f(matrix[x, :])
                    if m != 0:
                        for y in range(ncols):
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "negzero2value":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] <= 0:
                            matrix[x, y] = options.value

            elif method == "minmax":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y], matrix[y, x] = \
                            min(matrix[x, y], matrix[y, x]), \
                            max(matrix[x, y], matrix[y, x])

            elif method == "log":
                # apply log to all values.
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log10(matrix[x, y])

            elif method == "ln":
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log(matrix[x, y])

            elif method == "transpose":
                matrix = numpy.transpose(matrix)
                row_headers, col_headers = col_headers, row_headers
                nrows, ncols = ncols, nrows

            elif method == "mul":
                matrix = numpy.dot(matrix, numpy.transpose(matrix))
                col_headers = row_headers

            elif method == "multiply-by-value":
                matrix *= options.value

            elif method == "divide-by-value":
                matrix /= options.value

            elif method == "add-value":
                matrix += options.value

            elif method == "angle":
                # write angles between col vectors
                v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0))
                matrix = numpy.dot(numpy.transpose(matrix), matrix)
                row_headers = col_headers
                nrows = ncols
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] /= v1[x] * v1[y]

            elif method == "euclid":
                # convert to euclidean distance matrix
                matrix = numpy.zeros((ncols, ncols), numpy.float)
                for c1 in range(0, ncols - 1):
                    for c2 in range(c1 + 1, ncols):
                        for r in range(0, nrows):
                            d = raw_matrix[r][c1] - raw_matrix[r][c2]
                            matrix[c1, c2] += (d * d)
                        matrix[c2, c1] = matrix[c1, c2]
                matrix = numpy.sqrt(matrix)
                row_headers = col_headers
                nrows = ncols

            elif method.startswith("symmetrize"):
                f = method.split("-")[1]
                if f == "max":
                    f = max
                elif f == "min":
                    f = min
                elif f == "mean":
                    f = lambda x, y: float(x + y) / 2

                if nrows != ncols:
                    raise ValueError(
                        "symmetrize only available for symmetric matrices")
                if row_headers != col_headers:
                    raise ValueError(
                        "symmetrize not available for permuted matrices")
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] = matrix[y,
                                              x] = f(matrix[x, y], matrix[y,
                                                                          x])
            elif method == "sub":
                matrix = options.value - matrix

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[parameter])
                new_value = float(options.parameters[parameter + 1])
                parameter += 2
                if method == "upper-bound":
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] > boundary:
                                matrix[x, y] = new_value
                else:
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] < boundary:
                                matrix[x, y] = new_value

            elif method == "subtract-matrix":
                matrix = matrix - other_matrix

            elif method == "add-matrix":
                matrix = matrix + other_matrix

            elif method == "normalize-by-matrix":

                # set 0s to 1 in the other matrix
                for x in range(nrows):
                    for y in range(ncols):
                        if other_matrix[x, y] == 0:
                            other_matrix[x, y] = 1.0

                matrix = matrix / other_matrix

            elif method == "mix-matrix":
                for x in range(len(other_row_headers) - 1):
                    for y in range(x + 1, len(other_col_headers)):
                        matrix[x, y] = other_matrix[x, y]

            elif method == "set-diagonal":
                value = float(options.parameters[parameter])
                for x in range(min(nrows, ncols)):
                    matrix[x, x] = value
                parameter += 1

            elif method == "transpose":
                matrix = numpy.transpose(raw_matrix)
                row_headers, col_headers = col_headers, row_headers

            elif method == "correspondence-analysis":
                row_indices, col_indices = CorrespondenceAnalysis.GetIndices(
                    raw_matrix)
                map_row_new2old = numpy.argsort(row_indices)
                map_col_new2old = numpy.argsort(col_indices)

                matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix(
                    raw_matrix,
                    map_row_new2old,
                    map_col_new2old,
                    row_headers=row_headers,
                    col_headers=col_headers)

            elif method == "mask-rows":
                r = set(row_names)
                for x in range(len(row_headers)):
                    if row_headers[x] in r:
                        matrix[x, :] = options.value

            elif method == "mask-columns":
                r = set(column_names)
                for x in range(len(col_headers)):
                    if col_headers[x] in r:
                        matrix[:, x] = options.value

            elif method == "mask-rows-and-columns":

                r = set(row_names)
                c = set(column_names)
                for x in range(len(row_headers)):
                    for y in range(len(col_headers)):
                        if row_headers[x] in r and col_headers[y] in c:
                            matrix[x, y] = options.value

            raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape)

        else:
            # for simple re-formatting jobs
            matrix = raw_matrix

        if options.write_separators:
            options.stdout.write(lines[chunks[chunk]])

        MatlabTools.writeMatrix(sys.stdout,
                                matrix,
                                value_format=options.format,
                                format=options.output_format,
                                row_headers=row_headers,
                                col_headers=col_headers)

Пример #3

Показать файл

Файл: r_table2scatter.py Проект: zpeng1989/cgat

            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    result = R(
                        """cor.test( matrix[,%i], matrix[,%i], method='spearman')"""
                        % (x + 1, y + 1))
                    options.stdout.write(
                        "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                        (headers[x], headers[y], result['estimate']['rho'],
                         Stats.getSignificance(float(result['p.value'])),
                         result['p.value'], result['parameter']['df'],
                         result['method'], result['alternative']))

        elif method == "count":
            # number of shared elements > threshold
            m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"),
                                             take=options.columns,
                                             headers=True)
            mask = numpy.greater(m, options.threshold)
            counts = numpy.dot(numpy.transpose(mask), mask)
            writeMatrix(options.stdout, counts, headers=c, format="%i")

    if options.plot:

        # remove columns that are completely empty
        if "pairs" in options.plot:
            colsums = R('''colSums( is.na(matrix ))''')
            take = [x for x in range(len(colsums)) if colsums[x] != ndata]
            if take:
                E.warn("removing empty columns %s before plotting" % str(take))
                matrix = R.subset(matrix, select=[x + 1 for x in take])
                R.assign("""matrix""", matrix)