def plot_s(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str, ymin: Optional[float], ymax: Optional[float]) -> None: if (not ymin) or (not ymax): ymin = max(min(genedata["s"]), -1) ymax = min(max(genedata["s"]), 1) mh.plot_manhat(genedata, outpath, chrom_offsets, "s", title="Selection coefficient", yname="s", dims=(20, 6), scale=1.5, log=True, named_xticks=True, chrom_col="Scaffold", geom="line", ylim=(ymin, ymax))
def plot_p(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str, ymin: Optional[float], ymax: Optional[float]) -> None: if (not ymin) or (not ymax): ymin = max(min(genedata["p"]), 0) ymax = max(genedata["p"]) mh.plot_manhat( genedata, outpath, chrom_offsets, "p", title="Probability of selection coefficient occurring randomly", yname="-log10(p)", dims=(20, 6), scale=1.5, log=True, named_xticks=True, chrom_col="Scaffold", geom="line", ylim=(ymin, ymax))
def plot_p(genedata: pd.DataFrame, chrom_offsets: pd.DataFrame, outpath: str, ymin: Optional[float], ymax: Optional[float]) -> None: if (not ymin) or (not ymax): ymin = max(min(genedata["p"]), 0) ymax = max(genedata["p"]) mh.plot_manhat( genedata, outpath, chrom_offsets, "p", title="Tukey test probability of\ndifference in selection coefficient", yname="-log10(p)", dims=(20, 6), scale=1.5, log=True, named_xticks=True, chrom_col="Scaffold", geom="point", ylim=(ymin, ymax), color_col="reject")
def main(): with open("reflens.bed", "r") as inconn: chrlens = mh.get_chrom_lens_from_bed(inconn) print(chrlens) gdata = mh.get_data_from_bed("answer_dens.bed", data_col_name="Density") print(gdata) rdata = mh.get_data_from_bed("repanswer_dens.bed", data_col_name="Density") print(rdata) genedata, chrom_offsets = mh.manhatify(gdata, chrlens, chrom_col="Scaffold", bp_col="Start", val_col="Value", feature="Genes") repdata, chrom_offsets = mh.manhatify(rdata, chrlens, chrom_col="Scaffold", bp_col="Start", val_col="Value", feature="Repeats") combo = mh.combine_data((genedata, repdata)) print(combo.head()) print(chrom_offsets) mh.plot_manhat(combo, "select_families_dens2.pdf", chrom_offsets, "Density", title="Feature densities", yname="Features per 1Mb window", dims=(20, 6), scale=1.5, facet_col="Feature") mh.plot_manhat(combo, "select_families_dens3.pdf", chrom_offsets, "Density", title="Feature densities", yname="Features per 1Mb window", dims=(20, 6), scale=1.5, color_col="Feature")
def main(): # parse all arguments parser = argparse.ArgumentParser( "Visualize Hi-C pairing rates as a 2-d line plot.") parser.add_argument( "input", nargs='*', help="Input file(s) generated by pairviz (default = stdin).") parser.add_argument("-L", "--chrlens", help="chromosome lengths .bed file (required).", required=True) parser.add_argument("-o", "--output", help="output path (default = out.pdf).") parser.add_argument("-t", "--title", help="Title of plot (default = \"Pairing Rate\").") parser.add_argument( "-p", "--proportion", help= "If included, plot as a proportion of total reads in the region, rather than absolute (default = False).", action="store_true") parser.add_argument( "-f", "--no_fpkm", help= "If included, plot read counts rather than FPKM (default = False, can be combined with --proportion).", action="store_true") parser.add_argument("-s", "--self", help="Also plot self-interactions (default = False).", action="store_true") parser.add_argument( "-c", "--chromspace", help= "bp of space to put between chromosomes in plot (default = 5000000).") parser.add_argument("-l", "--log", help="Log-scale the y-axis (default = False).", action="store_true") parser.add_argument("-i", "--stdin", help="take input from stdin along with other inputs.", action="store_true") parser.add_argument( "-n", "--name_col", help="Name of the column to use for differentiating different runs") parser.add_argument("-x", "--x_axis_name", help="X axis name.") parser.add_argument("-y", "--y_axis_name", help="Y axis name.") parser.add_argument("-N", "--named_xticks", help="Use chromosome names for X axis ticks.", action="store_true") args = parser.parse_args() # Set all variables to defaule values, handle argument logic chrlens_path = args.chrlens output = "out.pdf" inconns = [] if args.stdin or not input: inconns.append(sys.stdin) title = "Pairing Rate" proportion = False self = False chromspace = 5000000 log = False name_col = None use_fpkm = True xname = "Genome position (bp)" yname = "Hi-C contacts" named_xticks = False if args.output: output = args.output if args.input: for i in args.input: inconns.append(open(i, "r")) if args.title: title = args.title if args.proportion: proportion = args.proportion if args.self: self = args.self if args.chromspace: chromspace = args.chromspace if args.log: log = args.log if args.no_fpkm: use_fpkm = False if args.x_axis_name: xname = args.x_axis_name if args.y_axis_name: yname = args.y_axis_name if args.name_col: name_col = args.name_col if args.named_xticks: named_xticks = True # set proportion vs total hits if not use_fpkm: if proportion: my_y = 'pair_prop' alt_y = 'alt_prop' else: my_y = 'hits' alt_y = 'alt_hits' else: if proportion: my_y = 'pair_prop_fpkm' alt_y = 'alt_prop_fpkm' else: my_y = 'pair_fpkm' alt_y = 'alt_fpkm' # make the combined data frame alldatas = parse_all_data(inconns) with open(chrlens_path, "r") as inconn: chrlens = mh.get_chrom_lens_from_bed(inconn) big_alldata = pd.concat(alldatas, ignore_index=True) if name_col: m_alldata = pd.melt(big_alldata, id_vars=['chrom', 'start', 'end', name_col]) else: m_alldata = pd.melt(big_alldata, id_vars=['chrom', 'start', 'end']) if self: mm_alldata = m_alldata[m_alldata.apply(lambda x: x['variable'] in (my_y, alt_y), axis=1)] else: mm_alldata = m_alldata[m_alldata.apply(lambda x: x['variable'] == my_y, axis=1)] mm_alldata['value'] = mm_alldata['value'].astype(float) manhat_data, chroffsets = mh.manhatify(mm_alldata, chrlens, chrom_col="chrom", bp_col="start", val_col="value", feature=name_col) mh.plot_manhat(manhat_data, output, chroffsets, "value", title=title, xname=xname, yname=yname, color_col=name_col, log=log, scale=1, named_xticks=named_xticks, chrom_col="chrom")