示例#1
0
 def _assign_taxonomy(self, extracted_reads, assignment_method):
     graftm_align_directory_base = os.path.join(self._working_directory, 'graftm_aligns')
     os.mkdir(graftm_align_directory_base)
     commands = []
     for singlem_package, sample_names, tmp_grafts in extracted_reads.each_package_wise():
         tmpnames = list([tg.name for tg in tmp_grafts if tg])
         if len(tmpnames) > 0:
             cmd = "%s "\
                   "--threads %i "\
                   "--forward %s "\
                   "--graftm_package %s "\
                   "--output_directory %s/%s "\
                   "--max_samples_for_krona 0 "\
                   "--assignment_method %s" % (
                       self._graftm_command_prefix(singlem_package.is_protein_package()),
                       self._num_threads,
                       ' '.join(tmpnames),
                       singlem_package.graftm_package_path(),
                       graftm_align_directory_base,
                       singlem_package.graftm_package_basename(),
                       assignment_method)
             commands.append(cmd)
     extern.run_many(commands, num_threads=1)
     logging.info("Finished running taxonomic assignment with graftm")
     return SingleMPipeTaxonomicAssignmentResult(graftm_align_directory_base)
示例#2
0
    def _align(self, search_result):
        graftm_separate_directory_base = os.path.join(self._working_directory, 'graftm_separates')
        os.mkdir(graftm_separate_directory_base)
        logging.info("Running separate alignments in GraftM..")
        commands = []

        def command(singlem_package, hit_files, is_protein):
            return self._graftm_command_prefix(is_protein) + \
                "--threads %i "\
                "--forward %s "\
                "--graftm_package %s --output_directory %s/%s "\
                "--search_only" % (
                    1, #use 1 thread since most likely better to parallelise processes with extern
                    ' '.join(hit_files),
                    singlem_package.graftm_package_path(),
                    graftm_separate_directory_base,
                    os.path.basename(singlem_package.graftm_package_path()))

        # Gather commands for aligning protein packages
        for singlem_package in self._singlem_package_database.protein_packages():
            commands.append(command(singlem_package, search_result.protein_hit_paths().values(), True))
        # Gather commands for aligning nucleotide packages.
        for singlem_package in self._singlem_package_database.nucleotide_packages():
            temporary_hit_files = [tf for _, tf in \
                search_result.direction_corrected_nucleotide_read_files()]
            commands.append(command(singlem_package, temporary_hit_files, False))

        extern.run_many(commands, num_threads=self._num_threads)
        return SingleMPipeAlignSearchResult(
            graftm_separate_directory_base, search_result.samples_with_hits())
示例#3
0
    def _assign_taxonomy(self, extracted_reads, assignment_method):
        graftm_align_directory_base = os.path.join(self._working_directory,
                                                   'graftm_aligns')
        os.mkdir(graftm_align_directory_base)
        commands = []
        all_tmp_files = []
        # Run each one at a time serially so that the number of threads is
        # respected, to save RAM as one DB needs to be loaded at once, and so
        # fewer open files are needed, so that the open file count limit is
        # eased.
        for singlem_package, readsets in extracted_reads.each_package_wise():
            tmp_files = []
            for readset in readsets:
                if len(readset.sequences) > 0:
                    tmp = tempfile.NamedTemporaryFile(prefix='singlem.%s' %
                                                      readset.sample_name,
                                                      suffix=".fasta")
                    # Record basename (remove .fasta) so that the graftm output
                    # file is recorded for later on in pipe.
                    tmpbase = os.path.basename(tmp.name[:-6])
                    readset.tmpfile_basename = tmpbase
                    seqio = SequenceIO()
                    seqio.write_fasta(readset.sequences, tmp)
                    tmp.flush()
                    tmp_files.append(tmp)

            if len(tmp_files) > 0:
                tmpnames = list([tg.name for tg in tmp_files])
                cmd = "%s "\
                      "--threads %i "\
                      "--forward %s "\
                      "--graftm_package %s "\
                      "--output_directory %s/%s "\
                      "--max_samples_for_krona 0 "\
                      "--assignment_method %s" % (
                          self._graftm_command_prefix(singlem_package.is_protein_package()),
                          self._num_threads,
                          ' '.join(tmpnames),
                          singlem_package.graftm_package_path(),
                          graftm_align_directory_base,
                          singlem_package.graftm_package_basename(),
                          assignment_method)
                commands.append(cmd)
                all_tmp_files.append(tmp_files)

        extern.run_many(commands, num_threads=1)
        for tmp_files in all_tmp_files:
            [t.close() for t in tmp_files]
        logging.info("Finished running taxonomic assignment with GraftM")
        return SingleMPipeTaxonomicAssignmentResult(
            graftm_align_directory_base)
示例#4
0
 def test_multi_with_exception(self):
     with self.assertRaises(ExternCalledProcessError) as ex:
         extern.run_many(['seq 2','cat /notafile'])
     self.assertEqual('Command cat /notafile returned non-zero exit status 1.\nSTDERR was: cat: /notafile: No such file or directory\nSTDOUT was: ',
                      str(ex.exception))
示例#5
0
 def test_multi_with_many_threads(self):
     commands = ['seq 2','seq 3 4']*50
     answers = ['1\n2\n','3\n4\n']*50
     self.assertEqual(answers, extern.run_many(commands, num_threads=10))
示例#6
0
 def test_multi_hello_world(self):
     self.assertEqual(['1\n2\n'], extern.run_many(['seq 2'], num_threads=1))