def _insert_preprocessed_data(study, params, prep_template, slq_out, **kwargs): """Inserts the preprocessed data to the database Parameters ---------- study : Study The study to preprocess params : BaseParameters The parameters to use for preprocessing prep_template : PrepTemplate The prep template to use for the preprocessing slq_out : str Path to the split_libraries_fastq.py output directory kwargs: ignored Necessary to include to support execution via moi. Raises ------ ValueError If the preprocessed output directory does not contain all the expected files """ from os.path import exists, join from functools import partial from qiita_db.data import PreprocessedData # The filepaths that we are interested in are: # 1) seqs.fna -> demultiplexed fasta file # 2) seqs.fastq -> demultiplexed fastq file # 3) seqs.demux -> demultiplexed HDF5 file path_builder = partial(join, slq_out) fasta_fp = path_builder('seqs.fna') fastq_fp = path_builder('seqs.fastq') demux_fp = path_builder('seqs.demux') log_fp = path_builder('split_library_log.txt') # Check that all the files exist if not (exists(fasta_fp) and exists(demux_fp) and exists(log_fp)): raise ValueError("The output directory %s does not contain all the " "expected files." % slq_out) filepaths = [(fasta_fp, "preprocessed_fasta"), (demux_fp, "preprocessed_demux"), (log_fp, "log")] if exists(fastq_fp): filepaths.append((fastq_fp, "preprocessed_fastq")) PreprocessedData.create(study, params._table, params.id, filepaths, prep_template) # Change the prep_template status to success prep_template.preprocessing_status = 'success'
def test_create_error_data_type(self): with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_sequence_illumina_params", self.params_id, self.filepaths, data_type="Metabolomics") with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_sequence_illumina_params", self.params_id, self.filepaths, data_type="Metabolomics", prep_template=self.prep_template)
def test_create_error_dynamic_table(self): """Raises an error if the preprocessed_params_table does not exist""" with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "foo", self.params_id, self.filepaths, data_type="18S") with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_foo", self.params_id, self.filepaths, data_type="18S") with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "foo_params", self.params_id, self.filepaths, data_type="18S") with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_foo_params", self.params_id, self.filepaths, data_type="18S")
def test_processing_status_setter_valueerror(self): """Raises an error if the processing status is not recognized""" ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") with self.assertRaises(ValueError): ppd.processing_status = 'not a valid state'
def test_create_data_type_only(self): # Check that the returned object has the correct id obs = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(obs.id, 3) # Check that the preprocessed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=3") # preprocessed_data_id, preprocessed_params_table, # preprocessed_params_id, submitted_to_insdc_status, # ebi_submission_accession, ebi_study_accession, data_type_id, # link_filepaths_status, vamps_status, processing_status exp = [[3, "preprocessed_sequence_illumina_params", 1, 'not submitted', None, None, 2, 'idle', 'not submitted', 'not_processed']] self.assertEqual(obs, exp) # Check that the preprocessed data has been linked with its study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[1, 3]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp)) self.assertTrue(exists(exp_fna_fp)) self._clean_up_files.append(exp_fna_fp) exp_qual_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.qual_fp)) self.assertTrue(exists(exp_qual_fp)) self._clean_up_files.append(exp_qual_fp) # Check that the filepaths have been correctly added to the DB obs_id = self.conn_handler.execute_fetchone( "SELECT count(1) from qiita.filepath")[0] obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=%d or " "filepath_id=%d" % (obs_id - 1, obs_id)) exp_fna_fp = "3_%s" % basename(self.fna_fp) exp_qual_fp = "3_%s" % basename(self.qual_fp) # filepath_id, path, filepath_type_id exp = [[obs_id - 1, exp_fna_fp, 4, '852952723', 1, 3], [obs_id, exp_qual_fp, 5, '852952723', 1, 3]] self.assertEqual(obs, exp) # Check that the preprocessed data have been correctly # linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_filepath WHERE " "preprocessed_data_id=3") # preprocessed_data_id, filepath_id self.assertEqual(obs, [[3, obs_id - 1], [3, obs_id]])
def test_ebi_study_accession(self): new = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, prep_template=self.prep_template, ebi_submission_accession=self.ebi_submission_accession, ebi_study_accession=self.ebi_study_accession) new.ebi_study_accession = 'EBI12345-DD' self.assertEqual(new.ebi_study_accession, 'EBI12345-DD')
def test_set_ebi_submission_accession(self): new = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, raw_data=self.raw_data, ebi_submission_accession=self.ebi_submission_accession, ebi_study_accession=self.ebi_study_accession) new.ebi_submission_accession = 'EBI12345-CC' self.assertEqual(new.ebi_submission_accession, 'EBI12345-CC')
def test_processing_status(self): """processing_status works correctly""" # Processed case ppd = PreprocessedData(1) self.assertEqual(ppd.processing_status, 'not_processed') # not processed case ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(ppd.processing_status, 'not_processed')
def test_insert_processed_data_target_gene(self): fd, fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, qual_fp = mkstemp(suffix='_seqs.qual') close(fd) filepaths = [ (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')), (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type')) ] preprocessed_data = PreprocessedData.create( Study(1), "preprocessed_sequence_illumina_params", 1, filepaths, data_type="18S") params = ProcessedSortmernaParams(1) pick_dir = mkdtemp() path_builder = partial(join, pick_dir) db_path_builder = partial(join, get_mountpoint('processed_data')[0][1]) # Create a placeholder for the otu table with open(path_builder('otu_table.biom'), 'w') as f: f.write('\n') # Create a placeholder for the directory mkdir(path_builder('sortmerna_picked_otus')) # Create the log file fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt') close(fd) with open(fp, 'w') as f: f.write('\n') _insert_processed_data_target_gene(preprocessed_data, params, pick_dir) new_id = get_count('qiita.processed_data') # Check that the files have been copied db_files = [ db_path_builder("%s_otu_table.biom" % new_id), db_path_builder("%s_sortmerna_picked_otus" % new_id), db_path_builder("%s_%s" % (new_id, basename(fp))) ] for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue( self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE " "processed_data_id=%s)", (new_id, ))[0])
def test_create_error(self): """Raises an error if the preprocessed_params_table does not exist""" with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "foo", self.params_id, self.filepaths) with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_foo", self.params_id, self.filepaths) with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "foo_params", self.params_id, self.filepaths) with self.assertRaises(IncompetentQiitaDeveloperError): PreprocessedData.create(self.study, "preprocessed_foo_params", self.params_id, self.filepaths)
def test_processing_status_setter(self): """Able to update the processing status""" ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(ppd.processing_status, 'not_processed') ppd.processing_status = 'processing' self.assertEqual(ppd.processing_status, 'processing') ppd.processing_status = 'processed' self.assertEqual(ppd.processing_status, 'processed') state = 'failed: some error message' ppd.processing_status = state self.assertEqual(ppd.processing_status, state)
def test_create(self): """Correctly creates all the rows in the DB for preprocessed data""" # Check that the returned object has the correct id obs = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, raw_data=self.raw_data) self.assertEqual(obs.id, 3) # Check that the preprocessed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=3") # preprocessed_data_id, raw_data_id, preprocessed_params_tables, # preprocessed_params_id exp = [[3, "preprocessed_sequence_illumina_params", 1, False]] self.assertEqual(obs, exp) # Check that the preprocessed data has been linked with its study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[1, 3]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp)) self.assertTrue(exists(exp_fna_fp)) self._clean_up_files.append(exp_fna_fp) exp_qual_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.qual_fp)) self.assertTrue(exists(exp_qual_fp)) self._clean_up_files.append(exp_qual_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=10 or " "filepath_id=11") # filepath_id, path, filepath_type_id exp = [[10, exp_fna_fp, 4, '852952723', 1], [11, exp_qual_fp, 5, '852952723', 1]] self.assertEqual(obs, exp) # Check that the preprocessed data have been correctly # linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_filepath WHERE " "preprocessed_data_id=3") # preprocessed_data_id, filepath_id self.assertEqual(obs, [[3, 10], [3, 11]])
def test_create(self): """Correctly creates all the rows in the DB for preprocessed data""" # Check that the returned object has the correct id obs = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, prep_template=self.prep_template, ebi_submission_accession=self.ebi_submission_accession, ebi_study_accession=self.ebi_study_accession) self.assertEqual(obs.id, 3) # Check that the preprocessed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=3") # preprocessed_data_id, preprocessed_params_table, # preprocessed_params_id, submitted_to_insdc_status, # ebi_submission_accession, ebi_study_accession, data_type_id, # link_filepaths_status, vamps_status, processing_status exp = [[3, "preprocessed_sequence_illumina_params", 1, 'not submitted', "EBI123456-A", "EBI123456-B", 2, 'idle', 'not submitted', 'not_processed']] self.assertEqual(obs, exp) # Check that the preprocessed data has been linked with its study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[1, 3]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp)) self.assertTrue(exists(exp_fna_fp)) self._clean_up_files.append(exp_fna_fp) exp_qual_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.qual_fp)) self.assertTrue(exists(exp_qual_fp)) self._clean_up_files.append(exp_qual_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=17 or " "filepath_id=18") exp_fna_fp = "3_%s" % basename(self.fna_fp) exp_qual_fp = "3_%s" % basename(self.qual_fp) # filepath_id, path, filepath_type_id exp = [[17, exp_fna_fp, 4, '852952723', 1, 3], [18, exp_qual_fp, 5, '852952723', 1, 3]] self.assertEqual(obs, exp)
def test_create_data_type_only(self): # Check that the returned object has the correct id obs = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(obs.id, 3) # Check that the preprocessed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[ 3, "preprocessed_sequence_illumina_params", 1, False, None, None, 2 ]] self.assertEqual(obs, exp) # Check that the preprocessed data has been linked with its study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[1, 3]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp)) self.assertTrue(exists(exp_fna_fp)) self._clean_up_files.append(exp_fna_fp) exp_qual_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.qual_fp)) self.assertTrue(exists(exp_qual_fp)) self._clean_up_files.append(exp_qual_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=15 or " "filepath_id=16") # filepath_id, path, filepath_type_id exp = [[15, exp_fna_fp, 4, '852952723', 1], [16, exp_qual_fp, 5, '852952723', 1]] self.assertEqual(obs, exp) # Check that the preprocessed data have been correctly # linked with the filepaths obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_filepath WHERE " "preprocessed_data_id=3") # preprocessed_data_id, filepath_id self.assertEqual(obs, [[3, 15], [3, 16]])
def test_create(self): """Correctly creates all the rows in the DB for preprocessed data""" # Check that the returned object has the correct id obs = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, raw_data=self.raw_data, ebi_submission_accession=self.ebi_submission_accession, ebi_study_accession=self.ebi_study_accession) self.assertEqual(obs.id, 3) # Check that the preprocessed data have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[ 3, "preprocessed_sequence_illumina_params", 1, False, "EBI123456-A", "EBI123456-B", 2 ]] self.assertEqual(obs, exp) # Check that the preprocessed data has been linked with its study obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_preprocessed_data WHERE " "preprocessed_data_id=3") exp = [[1, 3]] self.assertEqual(obs, exp) # Check that the files have been copied to right location exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp)) self.assertTrue(exists(exp_fna_fp)) self._clean_up_files.append(exp_fna_fp) exp_qual_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.qual_fp)) self.assertTrue(exists(exp_qual_fp)) self._clean_up_files.append(exp_qual_fp) # Check that the filepaths have been correctly added to the DB obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.filepath WHERE filepath_id=15 or " "filepath_id=16") # filepath_id, path, filepath_type_id exp = [[15, exp_fna_fp, 4, '852952723', 1], [16, exp_qual_fp, 5, '852952723', 1]] self.assertEqual(obs, exp)
def test_delete_advanced(self): # testing that we can not remove cause preprocessed data has been # submitted to EBI or VAMPS ppd = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, prep_template=self.prep_template) # fails due to VAMPS submission ppd.update_vamps_status('success') with self.assertRaises(QiitaDBStatusError): PreprocessedData.delete(ppd.id) ppd.update_vamps_status('failed') ppd = PreprocessedData(1) with self.assertRaises(QiitaDBStatusError): PreprocessedData.delete(ppd.id)
def test_status(self): ppd = PreprocessedData(1) self.assertEqual(ppd.status, 'private') # Since the status is inferred from the processed data, change the # status of the processed data so we can check how it changes in the # preprocessed data pd = ProcessedData(1) pd.status = 'public' self.assertEqual(ppd.status, 'public') # Check that new preprocessed data has sandbox as status since no # processed data exists for them ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="16S") self.assertEqual(ppd.status, 'sandbox')
def write_demux_files(self, prep_template, generate_hdf5=True): """Writes a demux test file to avoid duplication of code""" fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') if generate_hdf5: with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) else: with open(demux_fp, 'w') as f: f.write('') ppd = PreprocessedData.create(Study(1), "preprocessed_sequence_illumina_params", 1, [(demux_fp, 6)], prep_template) return ppd
def test_insert_processed_data_target_gene(self): fd, fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, qual_fp = mkstemp(suffix='_seqs.qual') close(fd) filepaths = [ (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')), (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))] preprocessed_data = PreprocessedData.create( Study(1), "preprocessed_sequence_illumina_params", 1, filepaths, data_type="18S") params = ProcessedSortmernaParams(1) pick_dir = mkdtemp() path_builder = partial(join, pick_dir) db_path_builder = partial(join, get_mountpoint('processed_data')[0][1]) # Create a placeholder for the otu table with open(path_builder('otu_table.biom'), 'w') as f: f.write('\n') # Create a placeholder for the directory mkdir(path_builder('sortmerna_picked_otus')) # Create the log file fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt') close(fd) with open(fp, 'w') as f: f.write('\n') _insert_processed_data_target_gene(preprocessed_data, params, pick_dir) new_id = get_count('qiita.processed_data') # Check that the files have been copied db_files = [db_path_builder("%s_otu_table.biom" % new_id), db_path_builder("%s_sortmerna_picked_otus" % new_id), db_path_builder("%s_%s" % (new_id, basename(fp)))] for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE " "processed_data_id=%s)", (new_id, ))[0])
def test_delete_basic(self): """Correctly deletes a preprocessed data""" # testing regular delete ppd = PreprocessedData.create( self.study, self.params_table, self.params_id, self.filepaths, prep_template=self.prep_template) PreprocessedData.delete(ppd.id) # testing that the deleted preprocessed data can't be instantiated with self.assertRaises(QiitaDBUnknownIDError): PreprocessedData(ppd.id) # and for completeness testing that it raises an error if ID # doesn't exist with self.assertRaises(QiitaDBUnknownIDError): PreprocessedData.delete(ppd.id) # testing that we can not remove cause the preprocessed data != sandbox with self.assertRaises(QiitaDBStatusError): PreprocessedData.delete(1)
def test_create_data_type_only(self): # Check that the returned object has the correct id new_id = get_count('qiita.preprocessed_data') + 1 obs = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(obs.id, new_id) # Check that all the information is initialized correctly self.assertEqual(obs.processed_data, []) self.assertEqual(obs.prep_template, []) self.assertEqual(obs.study, self.study.id) self.assertEqual(obs.data_type(), "18S") self.assertEqual(obs.data_type(ret_id=True), convert_to_id("18S", "data_type")) self.assertEqual(obs.submitted_to_vamps_status(), "not submitted") self.assertEqual(obs.processing_status, "not_processed") self.assertEqual(obs.status, "sandbox") self.assertEqual(obs.preprocessing_info, (self.params_table, self.params_id))
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = PreprocessedData.create( study, "preprocessed_sequence_illumina_params", 1, [(demux_fp, 6)], pt) return ppd