Пример #1
0
    def run(self):
        edges_path = cluster_svs.ClusterSVsStep(self.options).outpaths(final=True)["edges"]

        clusters = pandas.read_table(edges_path)
        clusters["chromx"] = clusters["chromx"].astype("string")
        clusters["chromy"] = clusters["chromy"].astype("string")

        assembled = []

        utilities.ensure_dir(self.outpaths(final=False)["graphs"])
        for cluster_number, cluster in clusters.groupby("cluster"):
            self.logger.log(cluster_number)
            try:
                cur_assembled = self.walk(cluster_number, cluster)
                assembled.append(cur_assembled)
            except IOError:
                print "not found", cluster_number

        # TODO: deal with empty list
        # TODO: normalize coordinates according to reference.compare_chroms()
        assembled = pandas.concat(assembled, ignore_index=True)
        assembled["x"] = assembled["x"].astype(int)
        assembled["y"] = assembled["y"].astype(int)

        print self.options.reference.chroms
        print assembled["chromx"].unique()
        print assembled["chromy"].unique()
        
        outpath = self.outpaths(final=False)["walk_assemblies"]
        assembled.to_csv(outpath, sep="\t", index=False)
Пример #2
0
    def run(self):
        self.assembly_dir = self.outpaths(final=False)["assembly_dir"]
        utilities.ensure_dir(self.assembly_dir)

        # TODO: how many reads is reasonable here?
        max_reads = 5e6

        fasta_path = self.combine_fastas(max_reads)
        contigs_path = self.run_assembly(fasta_path)
        self.align_contigs(contigs_path)
Пример #3
0
def visualize_graphs(outdir, graphs, evidence, file_label=""):
    try:
        utilities.ensure_dir(outdir)

        supported = 0
        missing = 0
        breakpoints = 0

        for i, graph in enumerate(graphs):
            print "visualize", i, graph
            graph = graph.copy()

            for n1,n2,data in graph.edges(data=True):
                data["label"] = "{}/{}={:.2g};{:.2g}".format(
                    int(data["shared"]),
                    int(data["total"]),
                    data["shared"]/float(data["total"]),
                    data["p"])
                if data["kind"]=="facing":
                    data["label"] = "[{}]".format(data["label"])
                    data["style"] = "dashed"
                elif data["kind"] == "breakpoint":
                    breakpoints += 1
                elif data["kind"] == "weak":
                    data["fontsize"] = 11
                    data["color"] = "gray"

                if "assembled" in data:
                    data["color"] = "orange"

            for node in graph.nodes():
                graph.node[node]["label"] = get_node_label(node)

            dot = networkx.nx_agraph.to_agraph(graph)
            if len(dot.edges())> 1000:
                print("  skipping")
                continue
            dot.draw("{}/temp{}{}.pdf".format(outdir, file_label, i), prog="dot")

        print("Supported:", supported, "Missing:", missing, "Total breakpoints:", breakpoints)
    except:
        pass
Пример #4
0
def visualize_frags(outdir, graphs, options):
    from rpy2.robjects import r

    utilities.ensure_dir(outdir)

    for i, graph in enumerate(graphs):
        r.pdf(os.path.join(outdir, "fragments.cluster_{}.pdf".format(i)))

        for component in networkx.connected_components(graph):
            subgraph = graph.subgraph(component)
            
            ends = [node for node,degree in subgraph.degree_iter() if degree==1]
            breakends = [node for node in list(networkx.shortest_simple_paths(subgraph, ends[0], ends[1]))[0]]
            # breakends = [breakend_from_label(node) for node in breakends]
            breakends = breakends[:-1:2] + breakends[-1:]
            # print ")"*100, breakends

            for sample, dataset in sorted(options.iter_10xdatasets()):
                plot_frags(breakends, options, sample, dataset)
        # plot_frags(breakpoints, options, sample, dataset)
        r["dev.off"]()
Пример #5
0
 def ensure_dirs(self, to_run):
     utilities.ensure_dir(to_run[0].results_dir)
     utilities.ensure_dir(to_run[0].working_dir)
     utilities.ensure_dir(
         os.path.join(self.options.log_dir, to_run[0].__class__.__name__))
Пример #6
0
def ready_output_dir(options):
    utilities.ensure_dir(options.working_dir)
    utilities.ensure_dir(options.results_dir)
    utilities.ensure_dir(options.log_dir)