Exemplo n.º 1
0
    def set_parameters(self, args):
        """Validate and set parameters.

        Parameters
        ----------
        args : dict
            command-line arguments

        Raises
        ------
        ValueError
            found invalid parameter(s)
        """
        # load arguments
        for key, val in vars(args).items():
            setattr(self, key, val)

        # check input directory and data
        if self.input:
            if isfile(self.input):
                self.input_map = {file2id(self.input): self.input}
            elif isdir(self.input):
                self.input_map = {
                    k: join(self.input, v)
                    for k, v in id2file_map(self.input, ext='tsv').items()
                }
            else:
                raise ValueError(
                    'Invalid input data file or directory: {}.'.format(
                        self.input))
            if len(self.input_map) == 0:
                raise ValueError('No input data are found under: {}.'.format(
                    self.input))

        # check / create output directory
        makedirs(self.output, exist_ok=True)
        self.prev_map = id2file_map(self.output, 'tsv')

        # load configurations
        get_config(self, 'evalue', 'analyze.evalue', float)
        for key in ('maxhits', 'identity', 'coverage'):
            get_config(self, key, 'analyze.{}'.format(key))
        for key in ('input_cov', 'self_rank', 'close_size', 'distal_top'):
            get_config(self, key, 'grouping.{}'.format(key.replace('_', '')))
        for key in ('weighted', 'outliers', 'orphans', 'bandwidth', 'bw_steps',
                    'low_part', 'noise', 'fixed', 'silhouette', 'self_low'):
            get_config(self, key, 'predict.{}'.format(key.replace('_', '')))

        # convert boolean values
        for key in ('weighted', 'orphans', 'self_low'):
            setattr(self, key, arg2bool(getattr(self, key, None)))

        # convert fractions to percentages
        for metric in ('input_cov', 'noise', 'fixed', 'distal_top'):
            val = getattr(self, metric)
            if val and val < 1:
                setattr(self, metric, val * 100)

        # convert distal top to a factor to save compute
        self.match_th = 1 - self.distal_top / 100

        # force coverage >= 50 to ensure that candidates are sequential
        if (self.input_cov or 0) < 50:
            raise ValueError('Taxonomy coverage for auto-interence must be at '
                             'least 50%.')
Exemplo n.º 2
0
 def test_file2id(self):
     self.assertEqual(file2id('sample.fna'), 'sample')
     self.assertEqual(file2id('sample.fna.gz'), 'sample')
     self.assertEqual(file2id('/path/to/sample.fna'), 'sample')
     self.assertEqual(file2id('sample'), 'sample')
     self.assertEqual(file2id(''), '')