Пример #1
0
    def test_fasta_sorting(self):
        """ Should sort fasta seqs from largest to smallest in outfile

        Since a fasta file has to be passed to the app controller for uclust,
        a temporary fasta file is created, and the raw fasta seqs supplied
        in this module are written to it.  This file is sent to the app
        controller, and the resulting sorted file is compared to the expected
        results to ensure proper function of uclust as called by this app
        controller."""

        test_app = Uclust({'--tmpdir': self.tmpdir})

        test_app_res = test_app(
            data={
                '--mergesort': self.tmp_unsorted_fasta_filepath,
                '--output': self.tmp_sorted_fasta_filepath
            })

        sorted_fasta_actual = [
            l.strip() for l in open(test_app_res['Output'].name, "U")
        ]
        sorted_fasta_expected = [l.strip() for l in sorted_dna_seqs if l]

        self.assertEqual(sorted_fasta_actual, sorted_fasta_expected)

        test_app_res.cleanUp()
Пример #2
0
    def test_clustering_fasta_filepath(self):
        """ Should create clusters in uclust format from sorted fasta file

        Since a fasta file has to be passed to the app controller for uclust,
        a temporary fasta file is created, and the sorted seqs supplied
        in this module are written to it.  This file is sent to the app
        controller, and the resulting uclust file is compared to the expected
        results to ensure proper function of uclust as called by this app
        controller."""

        test_app = Uclust({'--id': 0.9}, HALT_EXEC=False)
        test_app_res = test_app(
            data={
                '--input': self.tmp_sorted_fasta_filepath,
                '--uc': self.tmp_uc_filepath
            })

        uc_file = open(test_app_res['ClusterFile'].name, "U")
        # compare the actual and expect uc files, ignoring comment lines
        uc_file_actual = [l.strip() for l in uc_file if not l.startswith('#')]
        uc_file_expected = [
            l.strip() for l in uc_dna_clusters if not l.startswith('#')
        ]

        self.assertEqual(uc_file_actual, uc_file_expected)

        test_app_res.cleanUp()
Пример #3
0
def seqSort(infasta, outfasta, tmpdir):
    params = {'--tmpdir': tmpdir}
    app = Uclust(params, HALT_EXEC=False, TmpDir=tmpdir)
    app_result = app(data={'--mergesort': infasta, '--output': outfasta})
    if app_result['ExitStatus'] != 0:
        sys.stderr.write(app_result['StdErr'].read())
        sys.exit(app_result['ExitStatus'])
    app_result['Output'].close()
Пример #4
0
def search(infasta, inlib, outclust, pid, rev, tmpdir):
    params = {'--id': pid, '--lib': inlib, '--libonly': True}
    if rev:
        params['--rev'] = True
    app = Uclust(params, HALT_EXEC=False, TmpDir=tmpdir)
    app_result = app({'--input': infasta, '--uc': outclust})
    if app_result['ExitStatus'] != 0:
        sys.stderr.write(app_result['StdErr'].read())
        sys.exit(app_result['ExitStatus'])
    app_result['ClusterFile'].close()
Пример #5
0
    def test_parameter_availability(self):
        """ Often used parameters are accessible

            This is just some basic sanity checking.

        """
        a = Uclust()
        # if a parameter is not accessible, trying to turn it on will
        # raise a KeyError
        a.Parameters['--allhits'].on()
        a.Parameters['--libonly'].on()
        a.Parameters['--maxaccepts'].on(42)
        a.Parameters['--maxrejects'].on(42)
        a.Parameters['--rev'].on()
Пример #6
0
    def __call__(self,
                 seq_path,
                 result_path=None,
                 uc_path=None,
                 log_path=None,
                 HALT_EXEC=False):
        """Returns mapping of each seq to (tax, consensus fraction, n)

        Results:
        If result_path is specified, the results will be written to file
         as tab-separated lines of:
          query_id <tab> tax <tab> consensus fraction <tab> n
        If result_path is None (default), the results will be returned
         as a dict of:
          {'query_id': (tax, consensus fraction, n)}
        In both cases, the values are:
         tax: the consensus taxonomy assignment
         consensus fraction: the fraction of the assignments for the
          query that contained the lowest level tax assignment that is
          included in tax (e.g., if the assignment goes to genus level,
          this will be the fraction of assignments that had the consensus
          genus assignment)
         n: the number of assignments that were considered when constructing
          the consensus

        Parameters:
        seq_path: path to file of query sequences
        result_path: path where results should be written. If None (default),
         returns results as a dict
        uc_path: path where .uc file should be saved. If None (default), and
         log_path is specified, the .uc contents will be written to appended to
         the log file.
        log_path: path where run log should be written. If None (default), no
         log file is written.
        HALT_EXEC: debugging paramter. If pass, will exit just before the
         uclust command is issued, and will print the command that would have
         been called to stdout.
        """

        # initialize the logger
        logger = self._get_logger(log_path)
        logger.info(str(self))

        # set the user-defined parameters
        params = {
            '--id': self.Params['similarity'],
            '--maxaccepts': self.Params['max_accepts']
        }

        # initialize the application controller object
        app = Uclust(params, HALT_EXEC=HALT_EXEC)

        # Configure for consensus taxonomy assignment
        app.Parameters['--rev'].on()
        app.Parameters['--lib'].on(self.Params['reference_sequences_fp'])
        app.Parameters['--libonly'].on()
        app.Parameters['--allhits'].on()

        if uc_path is None:
            uc = NamedTemporaryFile(prefix='UclustConsensusTaxonAssigner_',
                                    suffix='.uc',
                                    dir=get_qiime_temp_dir())
            uc_path = uc.name
            store_uc_in_log = True
        else:
            store_uc_in_log = False

        app_result = app({'--input': seq_path, '--uc': uc_path})
        result = self._uc_to_assignment(app_result['ClusterFile'])
        if result_path is not None:
            # if the user provided a result_path, write the
            # results to file
            of = open(result_path, 'w')
            for seq_id, (assignment, consensus_fraction, n) in result.items():
                assignment_str = ';'.join(assignment)
                of.write('%s\t%s\t%1.2f\t%d\n' %
                         (seq_id, assignment_str, consensus_fraction, n))
            of.close()
            result = None
            logger.info('Result path: %s' % result_path)
        else:
            # If no result_path was provided, the result dict is
            # returned as-is.
            logger.info('Result path: None, returned as dict.')

        if store_uc_in_log:
            # This is a little hackish, but we don't have a good way
            # to pass the uc_path value right now through the
            # assign_taxonomy.py script, so writing the contents to the
            # user-specified log file (since this is being stored for logging
            # purposes).
            app_result['ClusterFile'].seek(0)
            logger.info('\n.uc file contents:\n')
            for line in app_result['ClusterFile']:
                logger.info(line.strip())

        return result