Пример #1
0
    def test_cmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('rnaseP-bsu.fa', td) as transcript, \
                     TestData('rnaseP-eubact.c.cm', td) as cm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2,3,4,5):
                            
                        db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                        aln_task_single = tasks.get_cmscan_task(transcript, out_single, 
                                                                cm, 1.0, 1,
                                                                self.cmscan_cfg)
                        aln_task_multi = tasks.get_cmscan_task(transcript, out_multi, 
                                                                cm, 1.0,
                                                                n_threads,
                                                                self.cmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)

                        alns_single = pd.concat(cmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(cmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['e_value'].sort_values() == \
                                            alns_multi['e_value'].sort_values()))
Пример #2
0
    def test_hmmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('20aa-alitest.fa', td) as prot, \
                     TestData('20aa.hmm', td) as hmm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:
                    
                    for n_threads in (2,3,4,5):
                        db_task = tasks.get_hmmpress_task(hmm, self.hmmpress_cfg)
                        aln_task_single = tasks.get_hmmscan_task(prot, out_single, 
                                                                 hmm, 1.0, 1,
                                                                 self.hmmscan_cfg)
                        aln_task_multi = tasks.get_hmmscan_task(prot, out_multi,
                                                                hmm, 1.0, n_threads,
                                                                self.hmmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)
                        print(os.listdir(td), file=sys.stderr)

                        print(open(out_single).read())
                        alns_single = pd.concat(hmmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(hmmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['domain_i_evalue'].sort_values() == \
                                            alns_multi['domain_i_evalue'].sort_values()))
Пример #3
0
    def test_cmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('rnaseP-bsu.fa')
            cm = datadir('rnaseP-eubact.c.cm')
            out_single = str(tmpdir.join('single'))
            out_multi = str(tmpdir.join('multi'))

            for n_threads in (2, 3, 4, 5):

                db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
                aln_tasks_single = CMScanTask().task(transcript,
                                                     out_single,
                                                     cm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_tasks_multi = CMScanTask().task(transcript,
                                                    out_multi,
                                                    cm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_tasks_single], ['run'])
                run_task(aln_tasks_multi)

                alns_single = pd.concat(InfernalParser(out_single))
                alns_multi = pd.concat(InfernalParser(out_multi))

                assert all(alns_single['e_value'].sort_values() == \
                           alns_multi['e_value'].sort_values())
Пример #4
0
    def test_hmmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('20aa-alitest.fa', td) as prot, \
                     TestData('20aa.hmm', td) as hmm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2, 3, 4, 5):
                        db_task = tasks.get_hmmpress_task(
                            hmm, self.hmmpress_cfg)
                        aln_task_single = tasks.get_hmmscan_task(
                            prot, out_single, hmm, 1.0, 1, self.hmmscan_cfg)
                        aln_task_multi = tasks.get_hmmscan_task(
                            prot, out_multi, hmm, 1.0, n_threads,
                            self.hmmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)
                        print(os.listdir(td), file=sys.stderr)

                        print(open(out_single).read())
                        alns_single = pd.concat(hmmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(hmmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['domain_i_evalue'].sort_values() == \
                                            alns_multi['domain_i_evalue'].sort_values()))
Пример #5
0
    def test_cmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('rnaseP-bsu.fa', td) as transcript, \
                     TestData('rnaseP-eubact.c.cm', td) as cm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2, 3, 4, 5):

                        db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                        aln_task_single = tasks.get_cmscan_task(
                            transcript, out_single, cm, 1.0, 1,
                            self.cmscan_cfg)
                        aln_task_multi = tasks.get_cmscan_task(
                            transcript, out_multi, cm, 1.0, n_threads,
                            self.cmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)

                        alns_single = pd.concat(cmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(cmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['e_value'].sort_values() == \
                                            alns_multi['e_value'].sort_values()))
Пример #6
0
def deploy():
    """
    Performs a deploy by invoking copy, then generating next release name and
    invoking necessary hooks.
    """

    init_tasks()

    if not has_hook("copy"):
        return report("No copy method has been defined")

    if not env.exists(paths.get_shared_path()):
        return report("You need to run setup before running deploy")

    run_hook("before_deploy")

    release_name = int(time.time() * 1000)
    release_path = paths.get_releases_path(release_name)

    env.current_release = release_path

    try:
        run_hook("copy")
    except Exception as e:
        return report("Error occurred on copy. Aborting deploy", err=e)

    if not env.exists(paths.get_source_path(release_name)):
        return report("Source path not found '%s'" %
                      paths.get_source_path(release_name))

    try:
        run_hook("deploy")
    except Exception as e:
        message = "Error occurred on deploy, starting rollback..."

        logger.error(message)
        logger.error(e)

        run_task("rollback")
        return report("Error occurred on deploy")

    # Symlink current folder
    paths.symlink(paths.get_source_path(release_name),
                  paths.get_current_path())

    # Clean older releases
    if "max_releases" in env:
        cleanup_releases(int(env.max_releases))

    run_hook("after_deploy")

    if "public_path" in env:
        paths.symlink(paths.get_source_path(release_name), env.public_path)

    logger.info("Deploy complete")
Пример #7
0
def deploy():
    """
    Performs a deploy by invoking copy, then generating next release name and
    invoking necessary hooks.
    """

    init_tasks()

    if not has_hook("copy"):
        return report("No copy method has been defined")

    if not env.exists(paths.get_shared_path()):
        return report("You need to run setup before running deploy")

    run_hook("before_deploy")

    release_name = int(time.time()*1000)
    release_path = paths.get_releases_path(release_name)

    env.current_release = release_path

    try:
        run_hook("copy")
    except Exception as e:
        return report("Error occurred on copy. Aborting deploy", err=e)

    if not env.exists(paths.get_source_path(release_name)):
        return report("Source path not found '%s'" %
                      paths.get_source_path(release_name))

    try:
        run_hook("deploy")
    except Exception as e:
        message = "Error occurred on deploy, starting rollback..."

        logger.error(message)
        logger.error(e)

        run_task("rollback")
        return report("Error occurred on deploy")

    # Symlink current folder
    paths.symlink(paths.get_source_path(release_name),
                  paths.get_current_path())

    # Clean older releases
    if "max_releases" in env:
        cleanup_releases(int(env.max_releases))

    run_hook("after_deploy")

    if "public_path" in env:
        paths.symlink(paths.get_source_path(release_name), env.public_path)

    logger.info("Deploy complete")
Пример #8
0
    def test_non_acgt(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('non-actg-transcripts.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(
                        transcript, output_fn)
                    stat = run_task(tsk)

                    self.assertEquals(stat, 2)
Пример #9
0
    def test_non_acgt(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('non-actg-transcripts.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(transcript,
                                                             output_fn)
                    stat = run_task(tsk)

                    self.assertEquals(stat, 2)
Пример #10
0
    def test_ambiguous_transcript(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript-N.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(
                        transcript, output_fn)
                    stat = run_task(tsk)

                    self.assertEquals(stat, 0)

                    print(os.listdir(td))
                    with open(output_fn) as fp:
                        results = json.load(fp)

                    self.assertIn('n_ambiguous', results)
                    self.assertEquals(results['n_ambiguous'], 1)

                    self.assertIn('N', results)
                    self.assertEquals(results['N'], 1)
                    print(results)
Пример #11
0
    def test_ambiguous_transcript(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript-N.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(transcript,
                                                             output_fn)
                    stat = run_task(tsk)

                    self.assertEquals(stat, 0)

                    print(os.listdir(td))
                    with open(output_fn) as fp:
                        results = json.load(fp)

                    self.assertIn('n_ambiguous', results)
                    self.assertEquals(results['n_ambiguous'], 1)


                    self.assertIn('N', results)
                    self.assertEquals(results['N'], 1)
                    print(results)