def test_get_filetypes(self): """Tests that get_filetypes works with valid arguments""" obs = get_filetypes() exp = {"SFF": 1, "FASTA_Sanger": 2, "FASTQ": 3, "FASTA": 4, "per_sample_FASTQ": 5} self.assertEqual(obs, exp) obs = get_filetypes(key="filetype_id") exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp)
def test_get_filetypes(self): """Tests that get_filetypes works with valid arguments""" obs = get_filetypes() exp = {'FASTA': 1, 'FASTQ': 2, 'SPECTRA': 3} self.assertEqual(obs, exp) obs = get_filetypes(key='filetype_id') exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp)
def test_get_filetypes(self): """Tests that get_filetypes works with valid arguments""" obs = get_filetypes() exp = {'SFF': 1, 'FASTA-Sanger': 2, 'FASTQ': 3, 'FASTA': 4} self.assertEqual(obs, exp) obs = get_filetypes(key='filetype_id') exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp)
def test_get_filetypes(self): """Tests that get_filetypes works with valid arguments""" obs = get_filetypes() exp = {'SFF': 1, 'FASTA_Sanger': 2, 'FASTQ': 3, 'FASTA': 4, 'per_sample_FASTQ': 5} self.assertEqual(obs, exp) obs = get_filetypes(key='filetype_id') exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp)
def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') # This part should fail fp1 = self.path_builder('sample1_failure.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) forward_filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type') fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] with self.assertRaises(ValueError): _get_preprocess_fastq_cmd(raw_data, prep_template, params)
def get(self, study_id): fp = get_study_fp(study_id) if exists(fp): fs = [f for f in listdir(fp)] else: fs = [] fts = [' '.join(k.split('_')[1:]) for k in get_filetypes().keys() if k.startswith('raw_')] self.render('study_description.html', user=self.current_user, study_info=Study(study_id).info, study_id=study_id, files=fs, max_upoad_size=qiita_config.max_upoad_size, filetypes=fts)
def render(self, study): user = self.current_user filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1)) other_studies_rd = sorted( viewitems(get_raw_data_from_other_studies(user, study))) raw_data_info = [(rd.id, rd.filetype, rd) for rd in get_raw_data(study.raw_data())] return self.render_string( "study_description_templates/raw_data_tab.html", filetypes=filetypes, other_studies_rd=other_studies_rd, available_raw_data=raw_data_info, study=study)
def render(self, study): user = self.current_user filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1)) other_studies_rd = sorted(viewitems( get_raw_data_from_other_studies(user, study))) raw_data_info = [(rd.id, rd.filetype, rd) for rd in get_raw_data(study.raw_data())] return self.render_string( "study_description_templates/raw_data_tab.html", filetypes=filetypes, other_studies_rd=other_studies_rd, available_raw_data=raw_data_info, study=study)
def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') fp1 = self.path_builder('sample1.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample2.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') fps = [(fp1, filepath_id), (fp2, filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data, prep_template, params) raw_fps = ','.join([fp for _, fp, _ in sorted(raw_data.get_filepaths())]) exp_cmd = ( "split_libraries_fastq.py --store_demultiplexed_fastq -i " "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type " "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 " "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 " "--sequence_max_n 0").format(raw_fps, obs_output_dir) self.assertEqual(obs_cmd, exp_cmd)
def render(self, study, full_access): user = self.current_user filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1)) other_studies_rd = sorted(viewitems( get_raw_data_from_other_studies(user, study))) raw_data_info = [ (rd.id, rd.filetype, rd, STATUS_STYLER[rd.status(study)]) for rd in get_raw_data(study.raw_data()) if full_access or rd.status(study) == 'public'] return self.render_string( "study_description_templates/raw_data_tab.html", filetypes=filetypes, other_studies_rd=other_studies_rd, available_raw_data=raw_data_info, study=study, full_access=full_access)
def test_get_filetypes_fail(self): """Tests that get_Filetypes fails with invalid argument""" with self.assertRaises(QiitaDBColumnError): get_filetypes(key='invalid')
def render(self, study, prep_template, full_access, ena_terms, user_defined_terms): user = self.current_user is_local_request = is_localhost(self.request.headers['host']) template_fps = [] qiime_fps = [] # Unfortunately, both the prep template and the qiime mapping files # have the sample type. The way to differentiate them is if we have # the substring 'qiime' in the basename for id_, fp in prep_template.get_filepaths(): if 'qiime' in basename(fp): qiime_fps.append( download_link_or_path( is_local_request, fp, id_, 'Qiime mapping')) else: template_fps.append( download_link_or_path( is_local_request, fp, id_, 'Prep template')) # Since get_filepaths returns the paths sorted from newest to oldest, # the first in both list is the latest one current_template_fp = template_fps[0] current_qiime_fp = qiime_fps[0] if len(template_fps) > 1: show_old_templates = True old_templates = template_fps[1:] else: show_old_templates = False old_templates = None if len(qiime_fps) > 1: show_old_qiime_fps = True old_qiime_fps = qiime_fps[1:] else: show_old_qiime_fps = False old_qiime_fps = None filetypes = sorted( ((ft, ft_id, fp_type_by_ft[ft]) for ft, ft_id in viewitems(get_filetypes())), key=itemgetter(1)) files = [f for _, f in get_files_from_uploads_folders(str(study.id))] other_studies_rd = sorted(viewitems( _get_accessible_raw_data(user))) # A prep template can be modified if its status is sandbox is_editable = prep_template.status == 'sandbox' raw_data_id = prep_template.raw_data preprocess_options = [] preprocessed_data = None show_preprocess_btn = True no_preprocess_msg = None if raw_data_id: rd = RawData(raw_data_id) rd_ft = rd.filetype # If the prep template has a raw data associated, it can be # preprocessed. Retrieve the pre-processing parameters if rd_ft in ('SFF', 'FASTA'): param_iter = Preprocessed454Params.iter() elif rd_ft == 'FASTQ': param_iter = [pip for pip in PreprocessedIlluminaParams.iter() if pip.values['barcode_type'] != 'not-barcoded'] elif rd_ft == 'per_sample_FASTQ': param_iter = [pip for pip in PreprocessedIlluminaParams.iter() if pip.values['barcode_type'] == 'not-barcoded'] else: raise NotImplementedError( "Pre-processing of %s files currently not supported." % rd_ft) preprocess_options = [] for param in param_iter: text = ("<b>%s:</b> %s" % (k, v) for k, v in viewitems(param.values)) preprocess_options.append((param.id, param.name, '<br>'.join(text))) preprocessed_data = prep_template.preprocessed_data # Check if the template have all the required columns for # preprocessing raw_data_files = rd.get_filepaths() if len(raw_data_files) == 0: show_preprocess_btn = False no_preprocess_msg = ( "Preprocessing disabled because there are no files " "linked with the Raw Data") else: if prep_template.data_type() in TARGET_GENE_DATA_TYPES: raw_forward_fps = [fp for _, fp, ftype in raw_data_files if ftype == 'raw_forward_seqs'] key = ('demultiplex_multiple' if len(raw_forward_fps) > 1 else 'demultiplex') missing_cols = prep_template.check_restrictions( [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]]) if rd_ft == 'per_sample_FASTQ': show_preprocess_btn = 'run_prefix' not in missing_cols else: show_preprocess_btn = len(missing_cols) == 0 no_preprocess_msg = None if not show_preprocess_btn: no_preprocess_msg = ( "Preprocessing disabled due to missing columns in " "the prep template: %s" % ', '.join(missing_cols)) preprocessing_status = prep_template.preprocessing_status return self.render_string( "study_description_templates/prep_template_info_tab.html", pt_id=prep_template.id, study_id=study.id, raw_data=raw_data_id, current_template_fp=current_template_fp, current_qiime_fp=current_qiime_fp, show_old_templates=show_old_templates, old_templates=old_templates, show_old_qiime_fps=show_old_qiime_fps, old_qiime_fps=old_qiime_fps, filetypes=filetypes, files=files, other_studies_rd=other_studies_rd, prep_template=prep_template, study=study, ena_terms=ena_terms, user_defined_terms=user_defined_terms, investigation_type=prep_template.investigation_type, is_editable=is_editable, preprocess_options=preprocess_options, preprocessed_data=preprocessed_data, preprocessing_status=preprocessing_status, show_preprocess_btn=show_preprocess_btn, no_preprocess_msg=no_preprocess_msg)
def display_template(self, study, msg, msg_level, tab_to_display=""): """Simple function to avoid duplication of code""" # Check if the request came from a local source is_local_request = ('localhost' in self.request.headers['host'] or '127.0.0.1' in self.request.headers['host']) # getting raw filepath_ types fts = [k.split('_', 1)[1].replace('_', ' ') for k in get_filepath_types() if k.startswith('raw_')] fts = ['<option value="%s">%s</option>' % (f, f) for f in fts] user = User(self.current_user) # getting the RawData and its prep templates available_raw_data = yield Task(self.get_raw_data, study.raw_data()) available_prep_templates = yield Task(self.get_prep_templates, available_raw_data) # set variable holding if we have files attached to all raw data or not raw_files = True if available_raw_data else False for r in available_raw_data: if not r.get_filepaths(): raw_files = False # set variable holding if we have all prep templates or not prep_templates = True if available_prep_templates else False for key, val in viewitems(available_prep_templates): if not val: prep_templates = False # other general vars, note that we create the select options here # so we do not have to loop several times over them in the template data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) data_types = ['<option value="%s">%s</option>' % (v, k) for k, v in data_types] filetypes = sorted(viewitems(get_filetypes()), key=itemgetter(1)) filetypes = ['<option value="%s">%s</option>' % (v, k) for k, v in filetypes] other_studies_rd = yield Task(self.get_raw_data_from_other_studies, user, study) other_studies_rd = ['<option value="%s">%s</option>' % (k, "id: %d, study: %s" % (k, v)) for k, v in viewitems(other_studies_rd)] ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] princ_inv = StudyPerson(study.info['principal_investigator_id']) pi_link = study_person_linkifier((princ_inv.email, princ_inv.name)) if SampleTemplate.exists(study.id): sample_templates = SampleTemplate(study.id).get_filepaths() else: sample_templates = [] self.render('study_description.html', user=self.current_user, study_title=study.title, study_info=study.info, study_id=study.id, filetypes=''.join(filetypes), user_level=user.level, data_types=''.join(data_types), available_raw_data=available_raw_data, available_prep_templates=available_prep_templates, ste=SampleTemplate.exists(study.id), study_status=study.status, filepath_types=''.join(fts), ena_terms=''.join(ena_terms), tab_to_display=tab_to_display, level=msg_level, message=msg, prep_templates=prep_templates, raw_files=raw_files, can_upload=check_access(user, study, no_public=True), other_studies_rd=''.join(other_studies_rd), user_defined_terms=user_defined_terms, files=get_files_from_uploads_folders(str(study.id)), is_public=study.status == 'public', pmids=", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]), principal_investigator=pi_link, is_local_request=is_local_request, sample_templates=sample_templates)