def test_collapse_fuzzy_junctions(self):
        """Test collapse_fuzzy_junctions, can_merge and compare_fuzzy_junctions."""
        test_name = "collapse_fuzzy_junctions"
        input_gff = op.join(_DAT_DIR_, "input_%s.gff" % test_name)
        input_group = op.join(_DAT_DIR_, "input_%s.group.txt" % test_name)
        output_gff = op.join(_OUT_DIR_, "output_%s.gff" % test_name)
        output_group = op.join(_OUT_DIR_, "output_%s.group.txt" % test_name)

        records = [r for r in CollapseGffReader(input_gff)]
        self.assertEqual(len(records), 4)

        r0, r1, r2, r3 = records
        # comparing r0 and r1
        m = compare_fuzzy_junctions(r0.ref_exons, r1.ref_exons, max_fuzzy_junction=5)
        self.assertEqual(m, "subset")
        self.assertTrue(can_merge(m, r0, r1, allow_extra_5exon=True, max_fuzzy_junction=5))

        # comparing r2 and r3
        m = compare_fuzzy_junctions(r2.ref_exons, r3.ref_exons, max_fuzzy_junction=5)
        self.assertEqual(m, "exact")
        self.assertTrue(can_merge(m, r2, r3, allow_extra_5exon=True, max_fuzzy_junction=5))

        # call collapse_fuzzy_junctions and write fuzzy output.
        collapse_fuzzy_junctions(gff_filename=input_gff,
                                 group_filename=input_group,
                                 fuzzy_gff_filename=output_gff,
                                 fuzzy_group_filename=output_group,
                                 allow_extra_5exon=True,
                                 max_fuzzy_junction=5)

        r4, r5 = [r for r in CollapseGffReader(output_gff)]
        self.assertEqual(r1, r4)
        self.assertEqual(r3, r5)
Exemplo n.º 2
0
    def test_collapse_fuzzy_junctions(self):
        """Test collapse_fuzzy_junctions, can_merge and compare_fuzzy_junctions."""
        test_name = "collapse_fuzzy_junctions"
        input_gff = op.join(_DAT_DIR_, "input_%s.gff" % test_name)
        input_group = op.join(_DAT_DIR_, "input_%s.group.txt" % test_name)
        output_gff = op.join(_OUT_DIR_, "output_%s.gff" % test_name)
        output_group = op.join(_OUT_DIR_, "output_%s.group.txt" % test_name)

        records = [r for r in CollapseGffReader(input_gff)]
        self.assertEqual(len(records), 4)

        r0, r1, r2, r3 = records
        # comparing r0 and r1
        m = compare_fuzzy_junctions(r0.ref_exons, r1.ref_exons, max_fuzzy_junction=5)
        self.assertEqual(m, "subset")
        self.assertTrue(can_merge(m, r0, r1, allow_extra_5exon=True, max_fuzzy_junction=5))

        # comparing r2 and r3
        m = compare_fuzzy_junctions(r2.ref_exons, r3.ref_exons, max_fuzzy_junction=5)
        self.assertEqual(m, "exact")
        self.assertTrue(can_merge(m, r2, r3, allow_extra_5exon=True, max_fuzzy_junction=5))

        # call collapse_fuzzy_junctions and write fuzzy output.
        collapse_fuzzy_junctions(gff_filename=input_gff,
                                 group_filename=input_group,
                                 fuzzy_gff_filename=output_gff,
                                 fuzzy_group_filename=output_group,
                                 allow_extra_5exon=True,
                                 max_fuzzy_junction=5)

        r4, r5 = [r for r in CollapseGffReader(output_gff)]
        self.assertEqual(r1, r4)
        self.assertEqual(r3, r5)
Exemplo n.º 3
0
    def run(self):
        """
        First, collapse input isoforms by calling Branch.run().
        Then collapse fuzzy junctions by calling collapse_fuzzy_junctions.
        Finally, pick up representitive gff record for each group of collapsed isoforms.
        """
        self.validate_inputs()

        logging.info("Collapsing isoforms into transcripts.")
        b = Branch(isoform_filename=self.isoform_filename,
                   sam_filename=self.sam_filename,
                   cov_threshold=self.min_flnc_coverage,
                   min_aln_coverage=self.min_aln_coverage,
                   min_aln_identity=self.min_aln_identity)

        b.run(allow_extra_5exon=self.allow_extra_5exon,
              skip_5_exon_alt=self.skip_5_exon_alt,
              ignored_ids_fn=self.ignored_ids_txt_fn,
              good_gff_fn=self.good_unfuzzy_gff_fn,
              bad_gff_fn=self.bad_unfuzzy_gff_fn,
              group_fn=self.unfuzzy_group_fn)

        logging.info("Good unfuzzy isoforms written to: %s", realpath(self.good_unfuzzy_gff_fn))
        logging.info("Bad unfuzzy isoforms written to: %s", realpath(self.bad_unfuzzy_gff_fn))
        logging.info("Unfuzzy isoform groups written to: %s", realpath(self.unfuzzy_group_fn))

        if self.shall_collapse_fuzzy_junctions:
            logging.info("Further collapsing fuzzy junctions.")
            # need to further collapse those that have fuzzy junctions!
            collapse_fuzzy_junctions(gff_filename=self.good_unfuzzy_gff_fn,
                                     group_filename=self.unfuzzy_group_fn,
                                     fuzzy_gff_filename=self.good_fuzzy_gff_fn,
                                     fuzzy_group_filename=self.fuzzy_group_fn,
                                     allow_extra_5exon=self.allow_extra_5exon,
                                     max_fuzzy_junction=self.max_fuzzy_junction)

            logging.info("Good fuzzy isoforms written to: %s", realpath(self.good_fuzzy_gff_fn))
            logging.info("Bad fuzzy isoforms written to: %s", realpath(self.bad_fuzzy_gff_fn))
            logging.info("Fuzzy isoform groups written to: %s", realpath(self.fuzzy_group_fn))
            ln(self.good_fuzzy_gff_fn, self.good_gff_fn)
            ln(self.good_fuzzy_gff_fn, self.gff_fn)
            ln(self.fuzzy_group_fn, self.group_fn)
        else:
            logging.info("No need to further collapse fuzzy junctions.")
            ln(self.good_unfuzzy_gff_fn, self.good_gff_fn)
            ln(self.good_unfuzzy_gff_fn, self.gff_fn)
            ln(self.unfuzzy_group_fn, self.group_fn)

        # Pick up representative
        logging.info("Picking up representative record.")
        pick_least_err_instead = not self.allow_extra_5exon # 5merge, pick longest

        pick_rep(isoform_filename=self.isoform_filename,
                 gff_filename=self.good_gff_fn,
                 group_filename=self.group_fn,
                 output_filename=self.rep_fn(self.suffix),
                 pick_least_err_instead=pick_least_err_instead,
                 bad_gff_filename=self.bad_gff_fn)

        logging.info("Ignored IDs written to: %s", realpath(self.ignored_ids_txt_fn))
        logging.info("Output GFF written to: %s", realpath(self.gff_fn))
        logging.info("Output Group TXT written to: %s", realpath(self.group_fn))
        logging.info("Output collapsed isoforms written to: %s", realpath(self.rep_fn(self.suffix)))
        logging.info("CollapseIsoforms Arguments: %s", self.arg_str())
Exemplo n.º 4
0
    def run(self):
        """
        First, collapse input isoforms by calling Branch.run().
        Then collapse fuzzy junctions by calling collapse_fuzzy_junctions.
        Finally, pick up representitive gff record for each group of collapsed isoforms.
        """
        self.validate_inputs()

        logging.info("Collapsing isoforms into transcripts.")
        b = Branch(isoform_filename=self.isoform_filename,
                   sam_filename=self.sam_filename,
                   cov_threshold=self.min_flnc_coverage,
                   min_aln_coverage=self.min_aln_coverage,
                   min_aln_identity=self.min_aln_identity)

        b.run(allow_extra_5exon=self.allow_extra_5exon,
              skip_5_exon_alt=self.skip_5_exon_alt,
              ignored_ids_fn=self.ignored_ids_txt_fn,
              good_gff_fn=self.good_unfuzzy_gff_fn,
              bad_gff_fn=self.bad_unfuzzy_gff_fn,
              group_fn=self.unfuzzy_group_fn)

        logging.info("Good unfuzzy isoforms written to: %s",
                     realpath(self.good_unfuzzy_gff_fn))
        logging.info("Bad unfuzzy isoforms written to: %s",
                     realpath(self.bad_unfuzzy_gff_fn))
        logging.info("Unfuzzy isoform groups written to: %s",
                     realpath(self.unfuzzy_group_fn))

        if self.shall_collapse_fuzzy_junctions:
            logging.info("Further collapsing fuzzy junctions.")
            # need to further collapse those that have fuzzy junctions!
            collapse_fuzzy_junctions(
                gff_filename=self.good_unfuzzy_gff_fn,
                group_filename=self.unfuzzy_group_fn,
                fuzzy_gff_filename=self.good_fuzzy_gff_fn,
                fuzzy_group_filename=self.fuzzy_group_fn,
                allow_extra_5exon=self.allow_extra_5exon,
                max_fuzzy_junction=self.max_fuzzy_junction)

            logging.info("Good fuzzy isoforms written to: %s",
                         realpath(self.good_fuzzy_gff_fn))
            logging.info("Bad fuzzy isoforms written to: %s",
                         realpath(self.bad_fuzzy_gff_fn))
            logging.info("Fuzzy isoform groups written to: %s",
                         realpath(self.fuzzy_group_fn))
            ln(self.good_fuzzy_gff_fn, self.good_gff_fn)
            ln(self.good_fuzzy_gff_fn, self.gff_fn)
            ln(self.fuzzy_group_fn, self.group_fn)
        else:
            logging.info("No need to further collapse fuzzy junctions.")
            ln(self.good_unfuzzy_gff_fn, self.good_gff_fn)
            ln(self.good_unfuzzy_gff_fn, self.gff_fn)
            ln(self.unfuzzy_group_fn, self.group_fn)

        # Pick up representative
        logging.info("Picking up representative record.")
        pick_least_err_instead = not self.allow_extra_5exon  # 5merge, pick longest

        pick_rep(isoform_filename=self.isoform_filename,
                 gff_filename=self.good_gff_fn,
                 group_filename=self.group_fn,
                 output_filename=self.rep_fn(self.suffix),
                 pick_least_err_instead=pick_least_err_instead,
                 bad_gff_filename=self.bad_gff_fn)

        logging.info("Ignored IDs written to: %s",
                     realpath(self.ignored_ids_txt_fn))
        logging.info("Output GFF written to: %s", realpath(self.gff_fn))
        logging.info("Output Group TXT written to: %s",
                     realpath(self.group_fn))
        logging.info("Output collapsed isoforms written to: %s",
                     realpath(self.rep_fn(self.suffix)))
        logging.info("CollapseIsoforms Arguments: %s", self.arg_str())