Exemplo n.º 1
0
    def test_identify_element_singal_ann_with_tes(self):
        gene_list = identify_element("Chr01", 4648477, 4653385, self.ngs,
                                     list(), list(), [("Chr01", 4649000, "-")],
                                     self.ann)
        self.assertEqual(len(gene_list), 1)

        gene = gene_list[0]
        self.assertEqual(str(gene), "Gene: Chr01:4648477-4653385 -")
        intron_list = sorted(gene.intron_list, key=lambda x: (x.start, x.end))
        self.assertEqual(
            str(intron_list),
            "[Intron: Chr01:4650078-4650291 -, Intron: Chr01:4650579-4650664 -, "
            +
            "Intron: Chr01:4650748-4651282 -, Intron: Chr01:4651337-4651429 -, "
            +
            "Intron: Chr01:4651463-4651566 -, Intron: Chr01:4651660-4651841 -]"
        )
        internal_exon_list = sorted(gene.internal_exon_list,
                                    key=lambda x: (x.start, x.end))
        self.assertEqual(
            str(internal_exon_list),
            "[Exon: Chr01:4650291-4650579 -, Exon: Chr01:4650664-4650748 -, " +
            "Exon: Chr01:4651282-4651337 -, Exon: Chr01:4651429-4651463 -, " +
            "Exon: Chr01:4651566-4651660 -]")
        tss_exon_list = sorted(gene.tss_exon_list,
                               key=lambda x: (x.start, x.end))
        self.assertEqual(str(tss_exon_list), "[Exon: Chr01:4651841-4652385 -]")
        tes_exon_list = sorted(gene.tes_exon_list,
                               key=lambda x: (x.start, x.end))
        self.assertEqual(str(tes_exon_list), "[Exon: Chr01:4649000-4650078 -]")
Exemplo n.º 2
0
 def test_identify_transcript_singal(self):
     gene_list = identify_element("Chr01", 4648477, 4653385, self.ngs,
                                  list(), list(), list(), self.ann)
     gene = gene_list[0]
     trans = identify_transcript(gene, self.ann)
     self.assertEqual(
         str(trans.isoA),
         "[Isoform: segment array: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]; tss=True; tes=True; invalid=False]"
     )
Exemplo n.º 3
0
 def test_identify_transcript_multi(self):
     gene_list = identify_element("Chr01", 795113, 808059, self.ngs, list(),
                                  list(), list(), self.ann)
     gene = gene_list[0]
     trans = identify_transcript(gene, self.ann)
     self.assertEqual(
         str(trans.isoA),
         "[Isoform: segment array: [1, 0, 1, 0, 1, 0]; tss=True; tes=True; invalid=False, "
         +
         "Isoform: segment array: [1, 0, 1, 0, 1, 1]; tss=True; tes=True; invalid=False]"
     )
Exemplo n.º 4
0
    def do(self,
           out_dir,
           ngs_obj_list,
           tgs_obj_list,
           ext_tss_list,
           ext_tes_list,
           ann,
           f_genome,
           paraclu_path=None):
        """Identify isoforms, until receive finish message"""
        gene_cluster_indx = 0
        with OutputHandle(os.path.join(out_dir,
                                       "node_{0}".format(self.rank))) as f_out:
            data = self.recv_data()
            while data != self.finish_msg:
                chrom, start, end = data
                gene_cluster_list = identify_element(
                    chrom, start, end, ngs_obj_list, tgs_obj_list,
                    ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path)

                for gene_cluster in gene_cluster_list:
                    if not gene_cluster.has_element():
                        continue
                    gene_cluster_indx += 1
                    gene_cluster_name = "n_{0}_c_{1}".format(
                        self.rank + 1, gene_cluster_indx)
                    gene_cluster.write_element2bed6(*f_out.element_handles(),
                                                    gene_cluster_name)
                    trans = identify_transcript(gene_cluster, ann)
                    trans.write2bed12(gene_cluster_name,
                                      *f_out.isoform_handles())
                self.debug("LBIdentifyIsoWorker: finish identification\n")
                self.comm.send((self.rank, self.finish_msg),
                               dest=self.root,
                               tag=self.worker_tag)
                data = self.recv_data()
        self.comm.send((self.rank, self.finish_msg),
                       dest=self.root,
                       tag=self.worker_tag)
Exemplo n.º 5
0
    def test_identify_element_mutil_anns(self):
        gene_list = identify_element("Chr01", 795113, 808059, self.ngs, list(),
                                     list(), list(), self.ann)
        self.assertEqual(len(gene_list), 1)

        gene = gene_list[0]
        self.assertEqual(str(gene), "Gene: Chr01:797113-802059 -")
        intron_list = sorted(gene.intron_list, key=lambda x: (x.start, x.end))
        self.assertEqual(
            str(intron_list),
            "[Intron: Chr01:799412-799981 -, Intron: Chr01:800328-800876 -]")
        internal_exon_list = sorted(gene.internal_exon_list,
                                    key=lambda x: (x.start, x.end))
        self.assertEqual(str(internal_exon_list),
                         "[Exon: Chr01:799981-800328 -]")
        tss_exon_list = sorted(gene.tss_exon_list,
                               key=lambda x: (x.start, x.end))
        self.assertEqual(
            str(tss_exon_list),
            "[Exon: Chr01:800876-800959 -, Exon: Chr01:800876-801059 -]")
        tes_exon_list = sorted(gene.tes_exon_list,
                               key=lambda x: (x.start, x.end))
        self.assertEqual(str(tes_exon_list), "[Exon: Chr01:798113-799412 -]")
Exemplo n.º 6
0
def main(args):
    """Main entry point allowing external calls
    Args:
      args ([str]): command line parameter list
    """
    args = parse_args(args)
    setup_logging(args.loglevel)
    check_paraclu(args)

    _logger.debug("Starting IGIA ...")

    ngs_obj_list = [SeqFile(x, "NGS") for x in args.ngs_file]
    tgs_obj_list = [SeqFile(x, "TGS") for x in args.tgs_file]
    ext_tss_list = load_txs(args.tss)
    ext_tes_list = load_txs(args.tes)

    out_dir = args.out_dir
    ann = load_ann(args.ann, args.size, out_dir, "ANN")

    # Update Global variables.
    GVAR.RULE = args.rule
    GVAR.TXS_DIFF = args.dtxs
    GVAR.SPLICED_INTRON_PIR_CUTOFF = args.pir
    f_genome = args.f_genome
    paraclu_path = args.paraclu_path

    load_seqinfo(ngs_obj_list)
    _logger.info("Start building linkage ...")
    bam_list = ngs_obj_list + tgs_obj_list
    if ann is not None:
        bam_list += [ann]
    linkage = find_linkage(bam_list)
    _logger.info("Finish building linkage")

    cluster_indx = 0
    with OutputHandle(out_dir) as f_out:
        for chrom, start, end in linkage.iterlinkage():
            try:
                if args.time_out is not None:
                    signal.signal(signal.SIGALRM, time_out_handler)
                    signal.alarm(args.time_out)
                _logger.debug(
                    "Start identifying elements in {0}:{1}-{2}".format(
                        chrom, start, end))
                gene_cluster_list = identify_element(
                    chrom, start, end, ngs_obj_list, tgs_obj_list,
                    ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path)
                _logger.debug(
                    "Finish identifying elements in {0}:{1}-{2}".format(
                        chrom, start, end))

                for gene_cluster in gene_cluster_list:  # list of gene cluster without any common exon
                    if not gene_cluster.has_element():
                        continue
                    cluster_indx += 1
                    cluster_name = "c_{0}".format(cluster_indx)
                    gene_cluster.write_element2bed6(*f_out.element_handles(),
                                                    cluster_name)

                    _logger.debug(
                        "Start identifying transcript for {0}".format(
                            gene_cluster))
                    trans = identify_transcript(gene_cluster, ann)
                    trans.write2bed12(cluster_name, *f_out.isoform_handles())
                    _logger.debug(
                        "Finish identifying transcript for {0}".format(
                            gene_cluster))
                if args.time_out is not None:
                    signal.alarm(0)
            except TimeOutError:
                print("TimeOut: {0}\t{1}\t{2}\n".format(chrom, start, end))
                with open(os.path.join(args.out_dir, "igia_debug_timeout.log"),
                          "a") as f:
                    f.write("TimeOut ({0}s): {1}\t{2}\t{3}\n".format(
                        args.time_out, chrom, start, end))

    _logger.info("End")