def test_submitted_to_insdc_status(self): """submitted_to_insdc_status works correctly""" # False case pd = PreprocessedData(1) self.assertEqual(pd.submitted_to_insdc_status(), 'submitting') # True case pd = PreprocessedData(2) self.assertEqual(pd.submitted_to_insdc_status(), 'not submitted')
def test_is_submitted_to_insdc(self): """is_submitted_to_insdc works correctly""" # False case pd = PreprocessedData(1) self.assertTrue(pd.is_submitted_to_insdc()) # True case pd = PreprocessedData(2) self.assertFalse(pd.is_submitted_to_insdc())
def post(self, preprocessed_data_id): user = self.current_user # make sure user is admin and can therefore actually submit to EBI if user.level != 'admin': raise HTTPError(403, "User %s cannot submit to EBI!" % user.id) submission_type = self.get_argument('submission_type') if submission_type not in ['ADD', 'MODIFY']: raise HTTPError(403, "User: %s, %s is not a recognized submission " "type" % (user.id, submission_type)) msg = '' msg_level = 'success' preprocessed_data = PreprocessedData(preprocessed_data_id) state = preprocessed_data.submitted_to_insdc_status() if state == 'submitting': msg = "Cannot resubmit! Current state is: %s" % state msg_level = 'danger' elif state == 'success' and submission_type == "ADD": msg = "Cannot resubmit! Current state is: %s, use MODIFY" % state msg_level = 'danger' else: channel = user.id job_id = submit(channel, submit_to_ebi, int(preprocessed_data_id), submission_type) self.render('compute_wait.html', job_id=job_id, title='EBI Submission', completion_redirect='/compute_complete/%s' % job_id) return self.display_template(preprocessed_data_id, msg, msg_level)
def post(self, preprocessed_data_id): # make sure user is admin and can therefore actually submit to VAMPS if self.current_user.level != 'admin': raise HTTPError(403, "User %s cannot submit to VAMPS!" % self.current_user.id) msg = '' msg_level = 'success' preprocessed_data = PreprocessedData(preprocessed_data_id) state = preprocessed_data.submitted_to_vamps_status() demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if state in ('submitting', 'success'): msg = "Cannot resubmit! Current state is: %s" % state msg_level = 'danger' elif demux_length != 1: msg = "The study doesn't have demux files or have too many" % state msg_level = 'danger' else: channel = self.current_user.id job_id = submit(channel, submit_to_VAMPS, int(preprocessed_data_id)) self.render('compute_wait.html', job_id=job_id, title='VAMPS Submission', completion_redirect='/compute_complete/%s' % job_id) return self.display_template(preprocessed_data_id, msg, msg_level)
def test_get_process_target_gene_cmd(self): preprocessed_data = PreprocessedData(1) params = ProcessedSortmernaParams(1) obs_cmd, obs_output_dir = _get_process_target_gene_cmd( preprocessed_data, params) _, ref_dir = get_mountpoint('reference')[0] _, preprocessed_dir = get_mountpoint('preprocessed_data')[0] exp_cmd = ("pick_closed_reference_otus.py -i {}1_seqs.fna -r " "{}GreenGenes_13_8_97_otus.fasta -o {} -p placeholder -t " "{}GreenGenes_13_8_97_otu_taxonomy.txt".format( preprocessed_dir, ref_dir, obs_output_dir, ref_dir)) obs_tokens = obs_cmd.split()[::-1] exp_tokens = exp_cmd.split()[::-1] self.assertEqual(len(obs_tokens), len(exp_tokens)) while obs_tokens: o_t = obs_tokens.pop() e_t = exp_tokens.pop() if o_t == '-p': # skip parameters file obs_tokens.pop() exp_tokens.pop() else: self.assertEqual(o_t, e_t)
def render(self, study): avail_ppd = [(ppd_id, PreprocessedData(ppd_id)) for ppd_id in study.preprocessed_data()] return self.render_string( "study_description_templates/preprocessed_data_tab.html", available_preprocessed_data=avail_ppd, study_id=study.id)
def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() exp = [(join(self.db_test_ppd_dir, '1_seqs.fna'), "preprocessed_sequences"), (join(self.db_test_ppd_dir, '1_seqs.qual'), "preprocessed_sequences_qual")] self.assertEqual(obs, exp)
def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join( targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def test_link_filepaths_status_setter(self): ppd = PreprocessedData(1) self.assertEqual(ppd.link_filepaths_status, 'idle') ppd._set_link_filepaths_status('linking') self.assertEqual(ppd.link_filepaths_status, 'linking') ppd._set_link_filepaths_status('unlinking') self.assertEqual(ppd.link_filepaths_status, 'unlinking') ppd._set_link_filepaths_status('failed: error') self.assertEqual(ppd.link_filepaths_status, 'failed: error')
def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'), "preprocessed_fasta"), (6, join(self.db_test_ppd_dir, '1_seqs.qual'), "preprocessed_fastq"), (7, join(self.db_test_ppd_dir, '1_seqs.demux'), "preprocessed_demux")] self.assertEqual(obs, exp)
def test_processing_status(self): """processing_status works correctly""" # Processed case ppd = PreprocessedData(1) self.assertEqual(ppd.processing_status, 'not_processed') # not processed case ppd = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(ppd.processing_status, 'not_processed')
def test_update_preprocessed_data_from_cmd(self): exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0]) exp_fps = exp_ppd.get_filepaths() # The original paths mush exist, but they're not included in the test # so create them here for _, fp, _ in exp_fps: with open(fp, 'w') as f: f.write("") next_fp_id = get_count('qiita.filepath') + 1 exp_fps.append((next_fp_id, join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id), 'log')) ppd = update_preprocessed_data_from_cmd(self.test_slo, 1) # Check that the modified preprocessed data is the correct one self.assertEqual(ppd.id, exp_ppd.id) # Check that the filepaths returned are correct # We need to sort the list returned from the db because the ordering # on that list is based on db modification time, rather than id obs_fps = sorted(ppd.get_filepaths()) self.assertEqual(obs_fps, exp_fps) # Check that the checksums have been updated sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s" # Checksum of the fasta file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[0][0], ))[0] self.assertEqual(obs_checksum, '3532748626') # Checksum of the fastq file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[1][0], ))[0] self.assertEqual(obs_checksum, '2958832064') # Checksum of the demux file # The checksum is generated dynamically, so the checksum changes # We are going to test that the checksum is not the one that was # before, which corresponds to an empty file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[2][0], ))[0] self.assertTrue(isinstance(obs_checksum, str)) self.assertNotEqual(obs_checksum, '852952723') self.assertTrue(len(obs_checksum) > 0) # Checksum of the log file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[3][0], ))[0] self.assertEqual(obs_checksum, '626839734')
def test_update_preprocessed_data_from_cmd_ppd(self): exp_ppd = PreprocessedData(2) next_fp_id = get_count('qiita.filepath') + 1 exp_fps = [] path_builder = partial(join, self.db_ppd_dir) suffix_types = [("seqs.fna", "preprocessed_fasta"), ("seqs.fastq", "preprocessed_fastq"), ("seqs.demux", "preprocessed_demux"), ("split_library_log.txt", "log")] for id_, vals in enumerate(suffix_types, start=next_fp_id): suffix, fp_type = vals exp_fps.append( (id_, path_builder("%s_%s" % (exp_ppd.id, suffix)), fp_type)) ppd = update_preprocessed_data_from_cmd(self.test_slo, 1, 2) # Check that the modified preprocessed data is the correct one self.assertEqual(ppd.id, exp_ppd.id) # Check that the filepaths returned are correct # We need to sort the list returned from the db because the ordering # on that list is based on db modification time, rather than id obs_fps = sorted(ppd.get_filepaths()) self.assertEqual(obs_fps, exp_fps) # Check that the checksums have been updated sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s" # Checksum of the fasta file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[0][0], ))[0] self.assertEqual(obs_checksum, '3532748626') # Checksum of the fastq file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[1][0], ))[0] self.assertEqual(obs_checksum, '2958832064') # Checksum of the demux file # The checksum is generated dynamically, so the checksum changes # We are going to test that the checksum is not the one that was # before, which corresponds to an empty file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[2][0], ))[0] self.assertTrue(isinstance(obs_checksum, str)) self.assertNotEqual(obs_checksum, '852952723') self.assertTrue(len(obs_checksum) > 0) # Checksum of the log file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[3][0], ))[0] self.assertEqual(obs_checksum, '626839734')
def test_vamps_status(self): ppd = PreprocessedData(1) # verifying current value self.assertEqual(ppd.submitted_to_vamps_status(), 'not submitted') # changing value and then verifying new value ppd.update_vamps_status('failed') self.assertEqual(ppd.submitted_to_vamps_status(), 'failed') # checking failure with self.assertRaises(ValueError): ppd.update_vamps_status('not a valid status')
def setUp(self): self.preprocessed_data = PreprocessedData(1) self.params_table = "processed_params_uclust" self.params_id = 1 fd, self.biom_fp = mkstemp(suffix='_table.biom') close(fd) self.filepaths = [(self.biom_fp, 6)] self.date = datetime(2014, 5, 29, 12, 24, 51) self.db_test_pd_dir = join(get_db_files_base_dir(), 'processed_data') with open(self.biom_fp, "w") as f: f.write("\n") self._clean_up_files = []
def processor(preprocessed_data_id, param_id, param_constructor): """Dispatch the processor work""" preprocessed_data = PreprocessedData(preprocessed_data_id) params = param_constructor(param_id) sp = StudyProcessor() try: process_out = sp(preprocessed_data, params) except Exception as e: error_msg = ''.join(format_exception_only(e, exc_info())) preprocessed_data.processing_status = "failed: %s" % error_msg process_out = None return process_out
def _get_template_variables(self, preprocessed_data_id, callback): """Generates all the variables needed to render the template Parameters ---------- preprocessed_data_id : int The preprocessed data identifier callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the preprocessed data does not have a log file """ # Get the objects and check user privileges ppd = PreprocessedData(preprocessed_data_id) study = Study(ppd.study) check_access(self.current_user, study, raise_error=True) # Get the return address back_button_path = self.get_argument( 'back_button_path', '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s' % (study.id, preprocessed_data_id)) # Get all the filepaths attached to the preprocessed data files_tuples = ppd.get_filepaths() # Group the files by filepath type files = defaultdict(list) for _, fp, fpt in files_tuples: files[fpt].append(fp) try: log_path = files['log'][0] except KeyError: raise HTTPError(500, "Log file not found in preprocessed data %s" % preprocessed_data_id) with open(log_path, 'U') as f: contents = f.read() contents = contents.replace('\n', '<br/>') contents = contents.replace('\t', ' ') title = 'Preprocessed Data: %d' % preprocessed_data_id callback((title, contents, back_button_path))
def test_update_insdc_status(self): """Able to update insdc status""" pd = PreprocessedData(1) self.assertEqual(pd.submitted_to_insdc_status(), 'submitting') pd.update_insdc_status('failed') self.assertEqual(pd.submitted_to_insdc_status(), 'failed') pd.update_insdc_status('success', 'foo', 'bar') self.assertEqual(pd.submitted_to_insdc_status(), 'success') self.assertEqual(pd.ebi_study_accession, 'foo') self.assertEqual(pd.ebi_submission_accession, 'bar') with self.assertRaises(ValueError): pd.update_insdc_status('not valid state') with self.assertRaises(ValueError): pd.update_insdc_status('success', 'only one accession')
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status('submitting') # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = 'Other' new_investigation_type = current_type else: # This should never happen raise ValueError("Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology") if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) with gzopen(sample_fp, 'w') as fh: for record in iterator: fh.write(record) output_dir = fastq_dir_fp + '_submission' samp_fp = join(fastq_dir_fp, 'sample_metadata.txt') prep_fp = join(fastq_dir_fp, 'prep_metadata.txt') sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir): makedirs(output_dir) else: raise IOError('The output folder already exists: %s' % output_dir) with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info['study_abstract'], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status('failed') raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status('success', study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def test_processed_data(self): """Correctly returns the processed data id""" ppd = PreprocessedData(1) self.assertEqual(ppd.processed_data, [1])
def test_prep_template(self): """Correctly returns the prep template""" ppd = PreprocessedData(1) self.assertEqual(ppd.prep_template, 1)
def test_ebi_ebi_study_accession(self): """Correctly returns the ebi_study_accession""" ppd = PreprocessedData(1) self.assertEqual(ppd.ebi_study_accession, 'EBI123456-BB')
def test_study(self): """Correctly returns the study""" ppd = PreprocessedData(1) self.assertEqual(ppd.study, 1)
def test_raw_data(self): """Correctly returns the raw data""" ppd = PreprocessedData(1) self.assertEqual(ppd.raw_data, 1)
def test_link_filepaths_status(self): ppd = PreprocessedData(1) self.assertEqual(ppd.link_filepaths_status, 'idle')
def test_link_filepaths_status_setter_error(self): ppd = PreprocessedData(1) with self.assertRaises(ValueError): ppd._set_link_filepaths_status('not a valid status')