def test_rna_fasta_format_id_starts_with_space(self): filepath = self.get_data_path( 'dna-sequences-id-starts-with-space.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, '1 starts with a space'): format.validate()
def test_rna_fasta_format_consecutive_IDs(self): filepath = self.get_data_path('dna-sequences-consecutive-ids.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex( ValidationError, 'consecutive descriptions.*1'): format.validate()
def test_rna_fasta_format_invalid_characters(self): filepath = self.get_data_path('not-rna-sequences.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, "Invalid character '1' " ".*0 on line 2"): format.validate()
def test_rna_fasta_format_empty_file(self): filepath = os.path.join(self.temp_dir.name, 'empty') with open(filepath, 'w') as fh: fh.write('\n') format = RNAFASTAFormat(filepath, mode='r') format.validate()
def test_cull_seqs_rna_default_params(self): # Test default params work with RNA seqs as input rna_path = self.get_data_path('cleanseq-test-1-rna.fasta') rna_seqs = RNAFASTAFormat(rna_path, mode='r').view(DNAIterator) obs = cull_seqs(rna_seqs) obs_ids = {seq.metadata['id'] for seq in obs.view(DNAIterator)} exp_ids = {'Ambig2', 'cleanseq'} self.assertEqual(obs_ids, exp_ids)
def get_silva_data(ctx, version='138.1', target='SSURef_NR99', include_species_labels=False, rank_propagation=True, ranks=None, download_sequences=True): # download data from SILVA print('Downloading raw files may take some time... get some coffee.') queries = _assemble_silva_data_urls(version, target, download_sequences) results = _retrieve_data_from_silva(queries) # parse taxonomy parse_taxonomy = ctx.get_action('rescript', 'parse_silva_taxonomy') taxonomy, = parse_taxonomy(taxonomy_tree=results['taxonomy tree'], taxonomy_map=results['taxonomy map'], taxonomy_ranks=results['taxonomy ranks'], include_species_labels=include_species_labels, ranks=ranks, rank_propagation=rank_propagation) # if skipping sequences, need to output an empty sequence file. if not download_sequences: results['sequences'] = qiime2.Artifact.import_data( 'FeatureData[RNASequence]', RNAFASTAFormat()) return results['sequences'], taxonomy
def test_rna_fasta_format_no_id(self): filepath = self.get_data_path('dna-sequences-no-id.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, '1.*missing an ID'): format.validate()
def test_rna_fasta_format_duplicate_ids(self): filepath = self.get_data_path('rna-sequences-with-duplicate-ids.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, '6.*duplicate.*1'): format.validate()
def test_rna_fasta_format_bom_fails(self): filepath = self.get_data_path('dna-with-bom-fails.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, 'First line'): format.validate()
def test_rna_fasta_format_corrupt_characters(self): filepath = self.get_data_path('dna-sequences-corrupt-characters.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, 'utf-8.*2'): format.validate()
def test_rna_fasta_format_missing_initial_ID(self): filepath = self.get_data_path('dna-sequences-first-line-not-id.fasta') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, 'First line'): format.validate()
def test_rna_fasta_format_validate_negative(self): filepath = self.get_data_path('not-rna-sequences') format = RNAFASTAFormat(filepath, mode='r') with self.assertRaisesRegex(ValidationError, 'RNAFASTA'): format.validate()
def test_rna_fasta_format_bom_passes(self): filepath = self.get_data_path('rna-with-bom-passes.fasta') format = RNAFASTAFormat(filepath, mode='r') format.validate()
def test_rna_fasta_format_validate_positive(self): filepath = self.get_data_path('rna-sequences.fasta') format = RNAFASTAFormat(filepath, mode='r') format.validate()