Exemplo n.º 1
0
    def test_run(self):
        """Test function run()."""
        inFN = op.join(self.testDir, "data/test_subset.fa")
        outFN = op.join(self.testDir, "out/test_subset_unit.fa")
        stdoutFN = op.join(self.testDir, "stdout/test_subset_unit.fa")

        rules = SubsetRules(1, 1) # Full-length, non-chimeric
        obj = ReadsSubsetExtractor(inFN, outFN, rules, True)
        obj.run()
        self.assertTrue(filecmp.cmp(outFN, stdoutFN))
Exemplo n.º 2
0
    def test_satisfy(self):
        """Test function satisfy()."""
        inFN = op.join(self.testDir, "data/test_subset.fa")
        reads = []
        with FastaReader(inFN) as reader:
            reads = [x for x in reader]

        rules = SubsetRules(1, 1) # Full-length, non-chimeric
        obj = ReadsSubsetExtractor("in", "out", rules, True)

        ans = [ReadAnnotation.fromString(r.name) for r in reads]
        res = [obj.satisfy(an, rules) for an in ans]
        expected = [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1]
        self.assertTrue(res == expected)
Exemplo n.º 3
0
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window,
                    detect_chimera_nfl=self.args.detect_chimera_nfl)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA,
                                 reuse_dom=self.args.reuse_dom)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(cDNA_size=self.args.cDNA_size,
                                      quiver=self.args.quiver,
                                      use_finer_qv=self.args.use_finer_qv)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc)
                ipq_opts = IceQuiverHQLQOptions(qv_trim_5=self.args.qv_trim_5,
                                                qv_trim_3=self.args.qv_trim_3,
                                                hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy,
                                                hq_isoforms_fa=self.args.hq_isoforms_fa,
                                                hq_isoforms_fq=self.args.hq_isoforms_fq,
                                                lq_isoforms_fa=self.args.lq_isoforms_fa,
                                                lq_isoforms_fq=self.args.lq_isoforms_fq)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              fasta_fofn=self.args.fasta_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              ipq_opts=ipq_opts,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn,
                              nfl_reads_per_split=self.args.nfl_reads_per_split)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(inFN=self.args.readsFN,
                                           outFN=self.args.outFN,
                                           rules=rules,
                                           ignore_polyA=self.args.ignore_polyA,
                                           printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(cmd,
                                            "Unknown command passed to pbtranscript.py:"
                                            + self.args.subName)
        except Exception:
            logging.exception("Exiting pbtranscript with return code 1.")
            return 1
        return 0
Exemplo n.º 4
0
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(cDNA_size=self.args.cDNA_size,
                                      quiver=self.args.quiver)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              hq_isoforms_fa=self.args.hq_isoforms_fa,
                              hq_isoforms_fq=self.args.hq_isoforms_fq,
                              lq_isoforms_fa=self.args.lq_isoforms_fa,
                              lq_isoforms_fq=self.args.lq_isoforms_fq,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(
                    inFN=self.args.readsFN,
                    outFN=self.args.outFN,
                    rules=rules,
                    ignore_polyA=self.args.ignore_polyA,
                    printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(
                    cmd, "Unknown command passed to pbtranscript.py:" +
                    self.args.subName)
        except Exception as err:
            logging.error(str(err))
            return 1
        return 0