Python AnnotationMatrix.read примеры использования

Язык программирования: Python

Пространство имен/Пакет: genomicode

Класс/Тип: AnnotationMatrix

Метод/Функция: read

Примеров на hotexamples.com: 5

Python AnnotationMatrix.read - 5 примеров найдено. Это лучшие примеры Python кода для genomicode.AnnotationMatrix.read, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

create_from_annotations(11)

rowslice(10)

read(5)

uniquify_headers(2)

AnnotationMatrix(1)

write(1)

Пример #1

Показать файл

    def run(self, network, in_data, out_attributes, user_options, num_cores,
            out_filename):
        import os
        import stat
        from genomicode import AnnotationMatrix

        # If the file is empty, then just create an empty positions file.
        if os.stat(in_data.identifier)[stat.ST_SIZE] == 0:
            open(out_filename, 'w')
            return

        M = AnnotationMatrix.read(in_data.identifier, header_char="##")

        # Headers are:
        # #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT [Samples...]
        # Pull out the #CHROM and POS columns.
        assert M.num_headers()
        assert M.headers[0] == "#CHROM"
        assert M.headers[1] == "POS"
        chrom_annots = M["#CHROM"]
        pos_annots = M["POS"]

        lines = []
        seen = {}
        for chrom, pos in zip(chrom_annots, pos_annots):
            chrom, pos = chrom.strip(), pos.strip()
            x = chrom, pos
            if x in seen:
                continue
            seen[x] = 1
            x = "\t".join(x) + "\n"
            lines.append(x)
        open(out_filename, 'w').writelines(lines)

Пример #2

Показать файл

def main():
    import os
    import argparse
    
    from genomicode import jmath
    from genomicode import AnnotationMatrix

    parser = argparse.ArgumentParser(description="")
    parser.add_argument("datafile", help="Tab-delimited data file.")
    parser.add_argument("header", help="Which column contains data to plot.")
    parser.add_argument(
        "plot_file", help="Name of image file, e.g. outfile.png.  "
        "Will generate PNG format by default.  If this file name ends with "
        ".pdf, will generate a PDF file instead.")
    parser.add_argument(
        "--prism_file", help="Write Prism-formatted results to this file.")
    parser.add_argument(
        "--ignore_missing_values", action="store_true",
        help="Ignore missing values in the file.")

    group = parser.add_argument_group(title="Calculations")
    group.add_argument(
        "--breaks_seq",
        help="Set the breakpoints.  Format: <start>,<stop>,<skip>.")
    group.add_argument(
        "--num_breaks", type=int, help="Number of breakpoints.")
    group.add_argument(
        "--ymax", type=int,
        help="Set the maximum value for the Y axis.")
    
    group = parser.add_argument_group(title="Plot Labels")
    group.add_argument("--title", help="Put a title on the plot.")
    group.add_argument("--xlab", help="Label the X-axis.")
    group.add_argument(
        "--xlabel_size", default=1.0, type=float,
        help="Scale the size of the labels on X-axis.  Default 1.0.")
    group.add_argument(
        "--xlabel_off", action="store_true", help="Do not label the X axis.")
    group.add_argument(
        "--ylabel_off", action="store_true", help="Do not label the Y axis.")
    group.add_argument(
        "--xtick_label_off", action="store_true",
        help="Do not draw the tick labels on the X axis.")

    group = parser.add_argument_group(title="Colors")
    group.add_argument(
        "--bar_color",  help="Set the color of the bars.  Default #FFFFFF")
    x = _fmt_palettes()
    group.add_argument(
        "--bar_palette", help="Color the bars according to a palette: %s." % x)
    group.add_argument(
        "--symmetric_palette", action="store_true",
        help="Make the color symmetric.")

    group = parser.add_argument_group(title="Appearance")
    group.add_argument(
        "--height", type=int, help="Height (in pixels) of the plot.")
    group.add_argument(
        "--width", type=int, help="Width (in pixels) of the plot.")
    group.add_argument(
        "--mar_left", default=1.0, type=float,
        help="Scale margin at left of plot.  Default 1.0 (no scaling).")
    group.add_argument(
        "--mar_bottom", default=1.0, type=float,
        help="Scale margin at bottom of plot.  Default 1.0.")
    group.add_argument(
        "--xaxis_off", action="store_true", help="Do not show the X axis.")
    group.add_argument(
        "--yaxis_off", action="store_true", help="Do not show the Y axis.")


    # Parse the input arguments.
    args = parser.parse_args()
    if not os.path.exists(args.datafile):
        parser.error("File not found: %s" % args.datafile)
    assert not (args.breaks_seq and args.num_breaks)
    if args.num_breaks:
        assert args.num_breaks >= 2 and args.num_breaks <= 1000
    if args.width is not None:
        assert args.width > 10, "too small"
        assert args.width < 4096*16, "width too big"
    if args.height is not None:
        assert args.height > 10, "too small"
        assert args.height < 4096*16, "height too big"
    assert args.mar_bottom > 0 and args.mar_bottom < 10
    assert args.mar_left > 0 and args.mar_left < 10
    assert args.xlabel_size > 0 and args.xlabel_size < 10
    assert not (args.bar_color and args.bar_palette)
    assert not args.symmetric_palette or args.bar_palette
    assert args.ymax is None or args.ymax > 0


    height = args.height or 2400
    width = args.width or 3200

    MATRIX = AnnotationMatrix.read(args.datafile, False)
    assert MATRIX.num_headers() and MATRIX.num_annots(), "Empty matrix."
    assert args.header in MATRIX.headers, "header not found: %s" % args.header

    # Pull out the values for the histogram.
    x = MATRIX[args.header]
    if args.ignore_missing_values:
        x = [x for x in x if x.strip()]
    values = map(float, x)

    value_min = value_max = None

    # Start R and set up the environment.
    R = jmath.start_R()

    main = jmath.R_var("NA")
    if args.title:
        main = args.title
    sub = ""
    xlab = ""
    if args.xlab:
        xlab = args.xlab
    ylab = "Frequency"
    xtick_labels = jmath.R_var("TRUE")
    ytick_labels = jmath.R_var("TRUE")

    if args.xlabel_off:
        xlab = ""
    if args.ylabel_off:
        ylab = ""
    if args.xtick_label_off:
        xtick_labels = jmath.R_var("FALSE")

    breaks = "Sturges"
    if args.breaks_seq:
        breaks = _parse_breaks_seq(args.breaks_seq)
        value_min, value_max = min(breaks), max(breaks)
        jmath.R_equals(breaks, "breaks")
        breaks = jmath.R_var("breaks")
    if args.num_breaks:
        breaks = args.num_breaks

    if value_min is not None:
        values = [x for x in values if x >= value_min]
    if value_max is not None:
        values = [x for x in values if x < value_max]

    lwd = 2
    cex_lab = 1.5
    cex_main = 2.0
    cex_sub = 1.5
    ylim = jmath.R_var("NULL")
    if args.ymax is not None:
        ylim = [0, args.ymax]

    assert values
    jmath.R_equals(values, "X")

    # Figure out the colors.  Do it after X is assigned.
    col = jmath.R_var("NULL")
    if args.bar_color:
        assert args.bar_color.startswith("#")
        col = args.bar_color
    elif args.bar_palette:
        # Figure out how many breaks there are.  Number of bars is num
        # breaks + 1.
        jmath.R_fn(
            "hist", jmath.R_var("X"), breaks=breaks, plot=jmath.R_var("FALSE"),
            RETVAL="x")
        breaks = [x for x in R["x"].rx2("breaks")]
        num_bars = len(breaks) + 1
        col = _make_col_palette(
            args.bar_palette, num_bars, args.symmetric_palette)

    bm_type = "png16m"
    if args.plot_file.lower().endswith(".pdf"):
        bm_type = "pdfwrite"
    jmath.R_fn(
        "bitmap", args.plot_file, type=bm_type, 
        height=height, width=width, units="px", res=300)
    
    # Set the margins.
    x = 5*1.2*args.mar_bottom, 4*1.2*args.mar_left, 4, 2
    mar = [x+0.1 for x in x]
    jmath.R_fn("par", mar=mar, RETVAL="op")

    jmath.R_fn(
        "hist", jmath.R_var("X"), breaks=breaks, main="", xlab="", ylab="",
        ylim=ylim, axes=jmath.R_var("FALSE"), col=col, RETVAL="x")
    # Make plot area solid white.
    #jmath.R('usr <- par("usr")')
    #jmath.R('rect(usr[1], usr[3], usr[2], usr[4], col="#FFFFFF")')
    #jmath.R_fn(
    #    "hist", jmath.R_var("X"), plot=jmath.R_var("FALSE"),
    #    main=main, xlab="", ylab="", axes=jmath.R_var("FALSE"),
    #    add=jmath.R_var("TRUE"))
    
    #jmath.R_fn("box", lwd=lwd)
    # x-axis
    if not args.xaxis_off:
        jmath.R_fn(
            "axis", 1, lwd=lwd, labels=xtick_labels, **{ "cex.axis" : 1.5 })
    # y-axis
    if not args.yaxis_off:
        jmath.R_fn(
            "axis", 2, lwd=lwd, labels=ytick_labels, **{ "cex.axis" : 1.5 })
    jmath.R_fn(
        "title", main=main, sub=sub, xlab=xlab, ylab=ylab,
        **{ "cex.lab" : cex_lab, "cex.main" : cex_main, "cex.sub" : cex_sub })
    R("par(op)")
    jmath.R_fn("dev.off")

    if args.prism_file:
        write_prism_file(args.prism_file, R["x"])

Пример #3

Показать файл

Файл: align_matrices.py Проект: firebitsbr/changlab

def main():
    import argparse
    from genomicode import AnnotationMatrix as AM

    SKIP_OUTFILE = "_"

    parser = argparse.ArgumentParser(
        description="Align a set of matrices.  Preserve the order of the "
        "first file given.")
    parser.add_argument("outfile", nargs="+")

    parser.add_argument(
        "--express_file", default=[], action="append", help="")
    parser.add_argument(
        "--annot_file", default=[], action="append", help="")
    parser.add_argument(
        "--header", default=[], action="append",
        help="Specify the header for an annotation file.  Should come "
        "after the --annot_file that it refers to.")
    parser.add_argument(
        "--annot_path",
        help="Align all the annotation files in a path.  "
        "If using this argument, no --annot_file or --express_file should "
        "be given.  "
        "--header is still required, and should apply to at least one file.  "
        'Only one "outfile" should be given, and it should refer to a path '
        "in which to store the aligned files.")
    
    #parser.add_argument(
    #    "--first_annot_header", help="If only aligning annotation files, "
    #    "find the samples to be matched under this header in the first "
    #    "annotation file.")
    parser.add_argument(
        "--clobber", default=False, action="store_true",
        help="Overwrite output files, if they already exist.")

    group = parser.add_argument_group(title="Comparisons")
    group.add_argument(
        "--case_insensitive", default=False, action="store_true",
        help="Do a case insensitive search of sample names.")
    group.add_argument(
        "--hash", default=False, action="store_true",
        help="Hash the sample names to [a-zA-Z0-9_] before comparison.")
    group.add_argument(
        "--ignore_nonalnum", default=False, action="store_true",
        help="Ignore non-alphanumeric characters in the IDs.")
    group.add_argument(
        "--ignore_blank", default=False, action="store_true",
        help="Ignore IDs that are blank (don't align them.")

    group = parser.add_argument_group(title="Joins")
    group.add_argument(
        "--strict", default=False, action="store_true",
        help="Complain if a file is missing a sample.")
    group.add_argument(
        "--left_join", default=False, action="store_true",
        help='By default, does an "inner join" and keeps only the '
        'records that are present in all files.  A "left join" will '
        'keep all records that occur in the first file.')
    group.add_argument(
        "--outer_join", default=False, action="store_true",
        help='By default, does an "inner join" and keeps only the '
        'records that are present in all files.  An "outer join" will '
        'also keep records that occur in any file.')

    group = parser.add_argument_group(title="Output")
    group.add_argument(
        "--null_string", default="",
        help='For left_join or outer_join, what to give the missing values.')
    group.add_argument(
        "--unaligned_only", action="store_true",
        help="Show only the rows that are not aligned.")
    group.add_argument(
        "--dont_add_missing_samples", action="store_true",
        help="If a matrix does not have a sample, don't fill in the value "
        "from another matrix.")

    group = parser.add_argument_group(title="Debug")
    group.add_argument(
        "--debug_nrows", type=int,
        help="Debugging: Only read this many rows from the annotation files.")
    
    args = parser.parse_args()
    # If the user specified an --annot_path, revise args to
    # contain --annot_files instead.
    sys.argv, args = _handle_annot_path(sys.argv, args)

    ni, no = len(args.express_file)+len(args.annot_file), len(args.outfile)
    assert ni == no, "Mismatch: %d inputs and %d outputs" % (ni, no)
        
    for x in args.express_file + args.annot_file:
        assert os.path.exists(x), "I could not find file: %s" % x
    for x in args.outfile:
        if x == SKIP_OUTFILE:
            continue
        assert args.clobber or not os.path.exists(x), "File exists: %s" % x
    assert not (args.left_join and args.outer_join)
    if args.null_string:
        assert args.outer_join or args.left_join, \
               "null_string given, but only used for outer_join"


    # Align the outfiles to the expression and annotation files.
    express_file = args.express_file[:]
    annot_file = args.annot_file[:]
    outfile = args.outfile[:]
    matrix_data = []  # list of (infile, outfile, is_express_file)
    for arg in sys.argv:
        if arg not in ["--express_file", "--annot_file"]:
            continue
        assert outfile
        if arg == "--express_file":
            assert express_file
            x = express_file.pop(0), outfile.pop(0), True
        else:
            assert annot_file
            x = annot_file.pop(0), outfile.pop(0), False
        matrix_data.append(x)
    assert not express_file
    assert not annot_file
    assert not outfile

    # Align the --header arguments to the annotation files.
    headers = [None] * len(matrix_data)
    header_i = -1
    for i, arg in enumerate(sys.argv):
        if arg == "--header":
            assert header_i >= 0, \
                   "--header given before an --express_file or --annot_file."
            assert headers[header_i] is None, "Two --header for one file."
            headers[header_i] = sys.argv[i+1]
        elif arg in ["--express_file", "--annot_file"]:
            header_i += 1

    # Add the headers to the matrix_data.
    new_matrix_data = []  # list of (infile, outfile, is_express_file, header)
    for i in range(len(matrix_data)):
        infile, outfile, is_express_file = matrix_data[i]
        if is_express_file and headers[i]:
            raise NotImplementedError, "No headers for --express_file."
        x = infile, outfile, is_express_file, headers[i]
        new_matrix_data.append(x)
    matrix_data = new_matrix_data

    # Read each of the files.
    new_matrix_data = []  # list of (infile, outfile, matrix, header)
    for x in matrix_data:
        infile, outfile, is_express_file, header = x
        if is_express_file:
            data = read_express(infile)
        else:
            data = AM.read(infile, nrows=args.debug_nrows)
        x = infile, outfile, data, header
        new_matrix_data.append(x)
    matrix_data = new_matrix_data

    # Find the samples in each matrix.
    new_matrix_data = []  # list of (infile, outfile, matrix, header, samples)
    samples_hint = peek_samples_hint(matrix_data)
    for x in matrix_data:
        infile, outfile, matrix, header = x
        headers_hint = [x for x in headers if x]
        x = get_samples(
            matrix, header, samples_hint, headers_hint,
            args.case_insensitive, args.hash, args.ignore_nonalnum)
        assert x, "I could not find the samples for %s" % infile
        header, samples = x
        x = infile, outfile, matrix, header, samples
        new_matrix_data.append(x)
    matrix_data = new_matrix_data

    if args.left_join:
        assert not args.strict, "Can't do a strict left join."
        # No duplicates.
        samples = list_all_samples(
            matrix_data[:1], args.case_insensitive, args.hash,
            args.ignore_nonalnum)
        assert samples, "No samples."
    elif args.outer_join:
        assert not args.strict, "Can't do a strict outer join."
        samples = list_all_samples(
            matrix_data, args.case_insensitive, args.hash,
            args.ignore_nonalnum)
        assert samples, "No samples."
    else:  # inner join
        samples = list_common_samples(
            matrix_data, args.case_insensitive, args.hash,
            args.ignore_nonalnum)
        assert samples, "No common samples found."

    if args.strict:
        all_samples = list_all_samples(
            matrix_data, args.case_insensitive, args.hash,
            args.ignore_nonalnum)
        common_samples = list_common_samples(
            matrix_data, args.case_insensitive, args.hash,
            args.ignore_nonalnum)
        if sorted(all_samples) != sorted(common_samples):
            missing_samples = []
            for x in all_samples:
                i = find_sample(
                    common_samples, x, args.case_insensitive, args.hash,
                    args.ignore_nonalnum, args.ignore_blank)
                if i >= 0:
                    continue
                missing_samples.append(x)
            short = missing_samples
            if len(short) > 10:
                short = short[:10] + ["..."]
            short = "\n".join(short)
            raise AssertionError, "%d samples not in all data sets.\n%s" % \
                  (len(missing_samples), short)

    # Align each of the matrices.
    matrix_data = align_matrices(
        matrix_data, samples, args.case_insensitive, args.hash,
        args.ignore_nonalnum, args.ignore_blank,
        args.left_join, args.outer_join, args.unaligned_only,
        args.null_string)

    # Add the missing samples back to the matrix.
    if not args.dont_add_missing_samples:
        matrix_data = add_missing_samples(matrix_data, args.null_string)

    # Write out each of the matrices.
    for x in matrix_data:
        infile, outfile, matrix, header, samples = x
        if outfile == SKIP_OUTFILE:
            continue
        write_matrix(outfile, matrix)

Пример #4

Показать файл

def main():
    import os
    import argparse

    from genomicode import jmath
    from genomicode import AnnotationMatrix
    from genomicode import colorlib
    from genomicode import pcalib

    parser = argparse.ArgumentParser(description="")
    parser.add_argument("datafile", help="Tab-delimited data file.")
    #parser.add_argument("x_header", help="Which column for X values.")
    #parser.add_argument("y_header", help="Which column for Y values.")
    parser.add_argument(
        "plot_file",
        help="Name of image file, e.g. outfile.png.  "
        "Will generate PNG format by default.  If this file name ends with "
        ".pdf, will generate a PDF file instead.")

    group = parser.add_argument_group(title="Data Series")
    group.add_argument(
        "--series",
        action="append",
        help="Add a data series to the plot.  At least one series must be "
        "plotted.  Format: <x_header>;<y_header>")

    group = parser.add_argument_group(title="General Appearance")
    group.add_argument("--no_box",
                       action="store_true",
                       help="Turn off the box around the plot.")
    group.add_argument("--height",
                       type=int,
                       help="Height (in pixels) of the plot.")
    group.add_argument("--width",
                       type=int,
                       help="Width (in pixels) of the plot.")
    group.add_argument(
        "--mar_left",
        default=1.0,
        type=float,
        help="Scale margin at left of plot.  Default 1.0 (no scaling).")
    group.add_argument("--mar_bottom",
                       default=1.0,
                       type=float,
                       help="Scale margin at bottom of plot.  Default 1.0.")
    #group.add_argument(
    #    "--xlabel_size", default=1.0, type=float,
    #    help="Scale the size of the labels on X-axis.  Default 1.0.")
    group.add_argument("--log_x",
                       action="store_true",
                       help="Plot the X-axis on a log scale.")
    group.add_argument("--log_y",
                       action="store_true",
                       help="Plot the Y-axis on a log scale.")
    group.add_argument(
        "--qq",
        action="store_true",
        help="Make a QQ-plot.  Will sort the values to be plotted.")

    group = parser.add_argument_group(title="Plot Labels")
    group.add_argument("--title", help="Put a title on the plot.")
    group.add_argument("--xlab", help="Label the X-axis.")
    group.add_argument("--ylab", help="Label the Y-axis.")
    group.add_argument("--add_regression",
                       action="store_true",
                       help="Put a regression line on the plot.")

    group = parser.add_argument_group(title="Legend")
    group.add_argument("--add_legend",
                       action="store_true",
                       help="Add a legend to the plot.")
    group.add_argument("--legend_inset", type=float, default=0.05, help="")
    LEGEND_LOCATIONS = [
        "bottomright",
        "bottom",
        "bottomleft",
        "left",
        "topleft",
        "top",
        "topright",
        "right",
        "center",
    ]
    group.add_argument("--legend_loc",
                       choices=LEGEND_LOCATIONS,
                       help="Where to draw the legend.")

    group = parser.add_argument_group(title="Point Appearance")
    group.add_argument("--scale_points",
                       default=1.0,
                       type=float,
                       help="Scale the size of the points.  Default 1.0")
    group.add_argument("--label_header",
                       help="Label each point with the values in this column.")
    group.add_argument("--label_size",
                       type=float,
                       help="Scale the size of the labels by this value.")
    group.add_argument("--label_pos",
                       default="top",
                       choices=["top", "bottom", "left", "right"],
                       help="Where to label the points.")

    group = parser.add_argument_group(title="Line Appearance")
    group.add_argument("--add_lines",
                       action="store_true",
                       help="Add lines that connect the points.")
    group.add_argument("--scale_lines",
                       default=1.0,
                       type=float,
                       help="Scale the thickness of the lines.  Default 1.0")

    group = parser.add_argument_group(title="Identity Line")
    group.add_argument("--add_identity_line",
                       action="store_true",
                       help="Add an identity line to the plot.")

    group = parser.add_argument_group(title="Colors")
    group.add_argument(
        "-c",
        "--cluster",
        action="append",
        help="Group samples into a cluster (e.g. -c 1-5); 1-based, inclusive.")
    group.add_argument(
        "--indexes_include_headers",
        "--iih",
        action="store_true",
        help="If not given (default), then index 1 is the first row "
        "with data.  If given, then index 1 is the very first row "
        "in the file, including the headers.")
    group.add_argument("--default_color",
                       help="Default color of points.  Format: #000000.")

    # Parse the input arguments.
    args = parser.parse_args()
    if not os.path.exists(args.datafile):
        parser.error("File not found: %s" % args.datafile)
    if args.width is not None:
        assert args.width > 10, "too small"
        assert args.width < 4096 * 16, "width too big"
    if args.height is not None:
        assert args.height > 10, "too small"
        assert args.height < 4096 * 16, "height too big"
    assert args.mar_bottom > 0 and args.mar_bottom < 10
    assert args.mar_left > 0 and args.mar_left < 10
    #assert args.xlabel_size > 0 and args.xlabel_size < 10

    assert args.legend_inset >= 0 and args.legend_inset < 10
    if args.legend_loc is None:
        args.legend_loc = "bottomright"

    if args.default_color:
        assert len(args.default_color) == 7
        assert args.default_color[0] == "#"

    MATRIX = AnnotationMatrix.read(args.datafile, False)
    assert MATRIX.num_headers() and MATRIX.num_annots(), "Empty matrix."
    assert args.series, "Need to add a data --series to plot."
    #assert len(args.series) <= 1, "Not implemented."
    #assert args.x_header in MATRIX.headers, \
    #       "header not found: %s" % args.x_header
    #assert args.y_header in MATRIX.headers, \
    #       "header not found: %s" % args.y_header
    if args.label_header:
        assert args.label_header in MATRIX.headers, \
               "header not found: %s" % args.label_header
    if args.label_size is not None:
        assert args.label_size > 0 and args.label_size <= 20
    assert args.scale_points > 0 and args.scale_points < 20
    assert args.scale_lines > 0 and args.scale_lines < 20

    series = _parse_series(MATRIX, args.series)
    cluster = None
    if args.cluster:
        cluster = _parse_cluster(args.cluster, args.indexes_include_headers,
                                 MATRIX)

    if len(series) > 1:
        assert not cluster, "Series and cluster not implemented."

    height = args.height or 2400
    width = args.width or 3200

    # Pull out the values and colors for the plot.
    default_color = "#000000"
    if args.default_color:
        default_color = args.default_color

    assert len(series) < len(colorlib.BREWER_QUALITATIVE_SET1)
    series_data = []  # list of (x_values, y_values, col) for each series
    for i in range(len(series)):
        x_header, y_header = series[i]
        x = MATRIX[x_header]
        y = MATRIX[y_header]
        I1 = [j for (j, a) in enumerate(x) if a]
        I2 = [j for (j, a) in enumerate(y) if a]
        I = [j for j in I1 if j in I2]
        x = [x[j] for j in I]
        y = [y[j] for j in I]
        x = map(float, x)
        y = map(float, y)
        assert len(x) == len(y)
        c = default_color
        if len(series) > 1:
            rgb = colorlib.BREWER_QUALITATIVE_SET1[i]
            c = colorlib.rgb2hex(rgb, prefix="#")
        c = [c] * len(x)
        x = x, y, c
        series_data.append(x)

    # Merge all the data point for each series.
    x_values = []
    y_values = []
    col = []
    for (x, y, c) in series_data:
        x_values.extend(x)
        y_values.extend(y)
        #c = [c] * len(x)
        col.extend(c)
    assert len(x_values) == len(y_values)
    assert len(x_values) == len(col)

    if args.qq:
        O = jmath.order(x_values)
        x_values = [x_values[i] for i in O]
        y_values = [y_values[i] for i in O]
        col = [col[i] for i in O]

    if cluster is not None:
        col_rgb = pcalib.choose_colors(cluster)
        col = [default_color] * len(col_rgb)
        for i in range(len(col_rgb)):
            if col_rgb[i] is None:
                continue
            col[i] = colorlib.rgb2hex(col_rgb[i], prefix="#")
        assert len(col) == len(x_values)

    #for i in range(len(x_values)):
    #    x = x_values[i], y_values[i], cluster[i], col[i]
    #    print "\t".join(map(str, x))

    # Start R and set up the environment.
    R = jmath.start_R()

    main = jmath.R_var("NA")
    if args.title:
        main = args.title
    sub = ""
    xlab = ""
    if len(series) == 1:
        xlab = x_header
    if args.xlab:
        xlab = args.xlab
    ylab = ""
    if len(series) == 1:
        ylab = y_header
    if args.xlab:
        ylab = args.ylab

    lwd_box = 2
    lwd_axis = 2
    lwd_regr = 3
    cex = 1.0 * args.scale_points
    cex_lab = 1.5
    cex_main = 2.0
    cex_sub = 1.0

    plot_log = ""
    if args.log_x:
        plot_log += "x"
    if args.log_y:
        plot_log += "y"

    assert x_values
    assert y_values
    jmath.R_equals(x_values, "X")
    jmath.R_equals(y_values, "Y")

    bm_type = "png16m"
    if args.plot_file.lower().endswith(".pdf"):
        bm_type = "pdfwrite"
    jmath.R_fn("bitmap",
               args.plot_file,
               type=bm_type,
               height=height,
               width=width,
               units="px",
               res=300)

    # Set the margins.
    x = 5 * 1.2 * args.mar_bottom, 4 * 1.2 * args.mar_left, 4, 2
    mar = [x + 0.1 for x in x]
    jmath.R_fn("par", mar=mar, RETVAL="op")

    jmath.R_fn("plot",
               jmath.R_var("X"),
               jmath.R_var("Y"),
               main="",
               xlab="",
               ylab="",
               pch=19,
               cex=cex,
               log=plot_log,
               col=col,
               axes=jmath.R_var("FALSE"),
               RETVAL="x")
    # Make plot area solid white.
    #jmath.R('usr <- par("usr")')
    #jmath.R('rect(usr[1], usr[3], usr[2], usr[4], col="#FFFFFF")')
    #jmath.R_fn(
    #    "hist", jmath.R_var("X"), plot=jmath.R_var("FALSE"),
    #    main=main, xlab="", ylab="", axes=jmath.R_var("FALSE"),
    #    add=jmath.R_var("TRUE"))

    if args.add_lines:
        lwd = 4 * args.scale_lines
        i = 0
        for (x, y, c) in series_data:
            # Cannot use c for the color.  It might've been changed by
            # --cluster.
            assert col and i < len(col)
            c = col[i:i + len(x)]
            i += len(x)

            # The "lines" function takes a scalar for col (except for
            # type=h, histogram vertical lines).  If there are
            # multiple colors, then split up the points based on the
            # colors.
            l_x, l_y, l_c = [], [], None
            for j in range(len(x)):
                if c[j] != l_c:
                    if l_x:
                        jmath.R_fn("lines", l_x, l_y, lwd=lwd, col=l_c)
                    # Add the previous point so that the points will
                    # connect.
                    if l_x:
                        l_x = [l_x[-1]]
                        l_y = [l_y[-1]]
                    else:
                        l_x, l_y, l_c = [], [], None
                l_x.append(x[j])
                l_y.append(y[j])
                l_c = c[j]
            if l_x:
                jmath.R_fn("lines", l_x, l_y, lwd=lwd, col=l_c)

    if args.add_identity_line:
        lwd = 4

        x_min, x_max = min(x_values), max(x_values)
        y_min, y_max = min(y_values), max(y_values)

        iden_min = max(x_min, y_min)
        iden_max = min(x_max, y_max)

        l_x = [iden_min, iden_max]
        l_y = l_x
        l_c = "#FF0000"
        jmath.R_fn("lines", l_x, l_y, lwd=lwd, col=l_c)

    if args.label_header:
        cex = 1
        if args.label_size is not None:
            cex = args.label_size
        pos2specifier = {
            "top": 3,
            "bottom": 1,
            "left": 2,
            "right": 4,
        }
        pos = pos2specifier[args.label_pos]
        point_labels = MATRIX[args.label_header]
        jmath.R_fn("text",
                   jmath.R_var("X"),
                   jmath.R_var("Y"),
                   labels=point_labels,
                   cex=cex,
                   pos=pos)

    # Calculate correlation, and other statistics.
    # TODO: Should calculate this for each series.
    r = jmath.R("cor(X, Y)")
    p_value = jmath.R("cor.test(X, Y)$p.value")
    r = r[0]
    p_value = p_value[0]
    print "R = %.2f" % r
    print "p = %.2g" % p_value

    # Add a regression line.
    if args.add_regression:
        jmath.R("fit <- lm(Y ~ X)")
        coef = jmath.R("fit$coefficients")
        assert len(coef) == 2
        b, m = coef
        x1 = min(x_values)
        y1 = x1 * m + b
        x2 = max(x_values)
        y2 = x2 * m + b
        jmath.R_fn("lines", [x1, x2], [y1, y2],
                   lwd=lwd_regr,
                   lty=2,
                   col="#C63F31")
        sub = "R=%.2f (p=%.2g)" % (r, p_value)
        header = "X", "Y", "R", "p"
        print "\t".join(header)
        x = xlab, ylab, r, p_value
        print "\t".join(map(str, x))

    if args.add_legend:
        leg = [x[1] for x in series]
        fill = [x[-1] for x in series_data]
        #jmath.R("x <- rgb(0.5, 0.5, 0.5, 0.5)")
        # alpha does not seem to be supported here.
        jmath.R_fn("legend",
                   args.legend_loc,
                   legend=leg,
                   fill=fill,
                   inset=args.legend_inset)

    if not args.no_box:
        jmath.R_fn("box", lwd=lwd_box)
    jmath.R_fn("axis", 1, lwd=lwd_axis, **{"cex.axis": 1.5})
    jmath.R_fn("axis", 2, lwd=lwd_axis, **{"cex.axis": 1.5})
    jmath.R_fn("title",
               main=main,
               sub=sub,
               xlab=xlab,
               ylab=ylab,
               **{
                   "cex.lab": cex_lab,
                   "cex.main": cex_main,
                   "cex.sub": cex_sub
               })
    R("par(op)")
    jmath.R_fn("dev.off")

Пример #5

Показать файл

Файл: extract_rsem_signal.py Проект: firebitsbr/changlab

    def run(
        self, network, in_data, out_attributes, user_options, num_cores,
        out_filename):
        import os
        from genomicode import jmath
        from genomicode import AnnotationMatrix
        from genomicode import alignlib
        #from Betsy import module_utils as mlib

        rsem_path = in_data.identifier
        assert os.path.exists(rsem_path)
        assert os.path.isdir(rsem_path)
        result_files = alignlib.find_rsem_result_files(rsem_path)
        assert result_files, "No .results files found."
        metadata = {}

        preprocess = out_attributes.get("preprocess")
        assert preprocess in ["tpm", "fpkm"]

        #x = mlib.get_user_option(
        #    user_options, "genes_or_isoforms", not_empty=True,
        #    allowed_values=["genes", "isoforms"])
        #get_genes = x == "genes"

        # Figure out whether to align to genome or transcriptome.
        x = out_attributes["expression_of"]
        assert x in ["gene", "isoform"]
        get_genes = x == "gene"

        transcript_header = "transcript_id(s)"
        if not get_genes:
            transcript_header = "transcript_id"

        # For each of the gene files, get the expression data.
        sample2matrix = {}  # sample -> AnnotationMatrix
        for x in result_files:
            sample, gene_filename, isoform_filename = x
            # Get the gene results.
            # TODO: Implement isoforms.
            filename = gene_filename
            if not get_genes:
                filename = isoform_filename
            assert filename is not None, "Missing: %s" % filename
            #if filename is None:
            #    continue
            assert os.path.exists(filename)
            matrix = AnnotationMatrix.read(filename)
            # Do some checking on the matrix.
            assert "gene_id" in matrix.headers
            assert transcript_header in matrix.headers
            assert "TPM" in matrix.headers
            assert "FPKM" in matrix.headers
            sample2matrix[sample] = matrix
        assert sample2matrix, "No samples"

        gene_id = transcript_id = None
        # Pull out the gene and transcript IDs.
        for matrix in sample2matrix.itervalues():
            x1 = matrix["gene_id"]
            x2 = matrix[transcript_header]
            if gene_id is None:
                gene_id = x1
            if transcript_id is None:
                transcript_id = x2
            assert x1 == gene_id
            assert x2 == transcript_id
        assert gene_id
        assert transcript_id
        assert len(gene_id) == len(transcript_id)

        # Assemble into a gene expression matrix.
        header = "TPM"
        if preprocess == "fpkm":
            header = "FPKM"
        t_data = []  # matrix, where each row is a sample.
        t_data.append(gene_id)
        t_data.append(transcript_id)
        samples = []
        for sample in sorted(sample2matrix):
            matrix = sample2matrix[sample]
            exp = matrix[header]
            assert len(exp) == len(gene_id)
            t_data.append(exp)
            samples.append(sample)

        data = jmath.transpose(t_data)
        header = ["gene_id", transcript_header] + samples
        data = [header] + data

        # Write out the data file.
        handle = open(out_filename, 'w')
        for x in data:
            print >>handle, "\t".join(map(str, x))

        return metadata