Exemplo n.º 1
0
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = (
            "-o {0} --barcode_type not-barcoded --max_bad_run_length 3 "
            "--max_barcode_errors 1.5 --min_per_read_length_fraction 0.75 "
            "--phred_quality_threshold 3 --sequence_max_n 0".format(
                obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
Exemplo n.º 2
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
Exemplo n.º 3
0
    def render(self, prep, study_id, is_editable, ena_terms,
               study_status, user_defined_terms):
        # Check if the request came from a local source
        is_local_request = self._is_local()

        prep_id = prep.id
        status_class1, status_class2, status_color = STATUS_STYLER[prep.status]
        data_type = prep.data_type()
        raw_data = RawData(prep.raw_data)
        filepaths = prep.get_filepaths()
        investigation_type = prep.investigation_type
        preprocessed_data = prep.preprocessed_data
        preprocessing_status = prep.preprocessing_status

        if raw_data.filetype in ('SFF', 'FASTA'):
            param_iter = Preprocessed454Params.iter()
        elif raw_data.filetype == 'FASTQ':
            param_iter = PreprocessedIlluminaParams.iter()
        else:
            raise ValueError("Don't know what to do but this exception will "
                             "never actually get shown anywhere because why "
                             "would you want to see tracebacks?")

        preprocess_options = []
        for param in param_iter:
            text = ("<b>%s:</b> %s" % (k, v)
                    for k, v in viewitems(param.values))
            preprocess_options.append((param.id,
                                       param.name,
                                       '<br>'.join(text)))

        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        _fp_type = (lambda fp: "Qiime mapping"
                    if 'qiime' in basename(fp) else "Prep template")
        filepaths = [(id_, fp, _fp_type(fp)) for id_, fp in filepaths]

        return self.render_string(
            "study_description_templates/prep_template_panel.html",
            prep_id=prep_id,
            status_class1=status_class1,
            status_class2=status_class2,
            status_color=status_color,
            data_type=data_type,
            filepaths=filepaths,
            investigation_type=investigation_type,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            study_id=study_id,
            is_local_request=is_local_request,
            is_editable=is_editable,
            ena_terms=ena_terms,
            study_status=study_status,
            user_defined_terms=user_defined_terms,
            preprocess_options=preprocess_options)
Exemplo n.º 4
0
    def render(self, prep, study_id, is_editable, ena_terms, study_status,
               user_defined_terms):
        # Check if the request came from a local source
        is_local_request = self._is_local()

        prep_id = prep.id
        data_type = prep.data_type()
        raw_data = RawData(prep.raw_data)
        filepaths = prep.get_filepaths()
        investigation_type = prep.investigation_type
        preprocessed_data = prep.preprocessed_data
        preprocessing_status = prep.preprocessing_status

        if raw_data.filetype in ('SFF', 'FASTA'):
            param_iter = Preprocessed454Params.iter()
        elif raw_data.filetype == 'FASTQ':
            param_iter = PreprocessedIlluminaParams.iter()
        else:
            raise ValueError("Don't know what to do but this exception will "
                             "never actually get shown anywhere because why "
                             "would you want to see tracebacks?")

        preprocess_options = []
        for param in param_iter:
            text = ("<b>%s:</b> %s" % (k, v)
                    for k, v in viewitems(param.values))
            preprocess_options.append(
                (param.id, param.name, '<br>'.join(text)))

        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        _fp_type = (lambda fp: "Qiime mapping"
                    if 'qiime' in basename(fp) else "Prep template")
        filepaths = [(id_, fp, _fp_type(fp)) for id_, fp in filepaths]

        return self.render_string(
            "study_description_templates/prep_template_panel.html",
            prep_id=prep_id,
            data_type=data_type,
            filepaths=filepaths,
            investigation_type=investigation_type,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            study_id=study_id,
            is_local_request=is_local_request,
            is_editable=is_editable,
            ena_terms=ena_terms,
            study_status=study_status,
            user_defined_terms=user_defined_terms,
            preprocess_options=preprocess_options)
Exemplo n.º 5
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
Exemplo n.º 6
0
    def render(self, study, prep_template, full_access, ena_terms,
               user_defined_terms):
        user = self.current_user
        is_local_request = is_localhost(self.request.headers['host'])

        template_fps = []
        qiime_fps = []
        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        for id_, fp in prep_template.get_filepaths():
            if 'qiime' in basename(fp):
                qiime_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Qiime mapping'))
            else:
                template_fps.append(
                    download_link_or_path(
                        is_local_request, fp, id_, 'Prep template'))

        # Since get_filepaths returns the paths sorted from newest to oldest,
        # the first in both list is the latest one
        current_template_fp = template_fps[0]
        current_qiime_fp = qiime_fps[0]

        if len(template_fps) > 1:
            show_old_templates = True
            old_templates = template_fps[1:]
        else:
            show_old_templates = False
            old_templates = None

        if len(qiime_fps) > 1:
            show_old_qiime_fps = True
            old_qiime_fps = qiime_fps[1:]
        else:
            show_old_qiime_fps = False
            old_qiime_fps = None

        filetypes = sorted(
            ((ft, ft_id, fp_type_by_ft[ft])
             for ft, ft_id in viewitems(get_filetypes())),
            key=itemgetter(1))
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        other_studies_rd = sorted(viewitems(
            _get_accessible_raw_data(user)))

        # A prep template can be modified if its status is sandbox
        is_editable = prep_template.status == 'sandbox'

        raw_data_id = prep_template.raw_data
        preprocess_options = []
        preprocessed_data = None
        show_preprocess_btn = True
        no_preprocess_msg = None
        if raw_data_id:
            rd = RawData(raw_data_id)
            rd_ft = rd.filetype

            # If the prep template has a raw data associated, it can be
            # preprocessed. Retrieve the pre-processing parameters
            if rd_ft in ('SFF', 'FASTA'):
                param_iter = Preprocessed454Params.iter()
            elif rd_ft == 'FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] != 'not-barcoded']
            elif rd_ft == 'per_sample_FASTQ':
                param_iter = [pip for pip in PreprocessedIlluminaParams.iter()
                              if pip.values['barcode_type'] == 'not-barcoded']
            else:
                raise NotImplementedError(
                    "Pre-processing of %s files currently not supported."
                    % rd_ft)

            preprocess_options = []
            for param in param_iter:
                text = ("<b>%s:</b> %s" % (k, v)
                        for k, v in viewitems(param.values))
                preprocess_options.append((param.id,
                                           param.name,
                                           '<br>'.join(text)))
            preprocessed_data = prep_template.preprocessed_data

            # Check if the template have all the required columns for
            # preprocessing
            raw_data_files = rd.get_filepaths()
            if len(raw_data_files) == 0:
                show_preprocess_btn = False
                no_preprocess_msg = (
                    "Preprocessing disabled because there are no files "
                    "linked with the Raw Data")
            else:
                if prep_template.data_type() in TARGET_GENE_DATA_TYPES:
                    raw_forward_fps = [fp for _, fp, ftype in raw_data_files
                                       if ftype == 'raw_forward_seqs']
                    key = ('demultiplex_multiple' if len(raw_forward_fps) > 1
                           else 'demultiplex')
                    missing_cols = prep_template.check_restrictions(
                        [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]])

                    if rd_ft == 'per_sample_FASTQ':
                        show_preprocess_btn = 'run_prefix' not in missing_cols
                    else:
                        show_preprocess_btn = len(missing_cols) == 0

                    no_preprocess_msg = None
                    if not show_preprocess_btn:
                        no_preprocess_msg = (
                            "Preprocessing disabled due to missing columns in "
                            "the prep template: %s" % ', '.join(missing_cols))

        preprocessing_status = prep_template.preprocessing_status

        return self.render_string(
            "study_description_templates/prep_template_info_tab.html",
            pt_id=prep_template.id,
            study_id=study.id,
            raw_data=raw_data_id,
            current_template_fp=current_template_fp,
            current_qiime_fp=current_qiime_fp,
            show_old_templates=show_old_templates,
            old_templates=old_templates,
            show_old_qiime_fps=show_old_qiime_fps,
            old_qiime_fps=old_qiime_fps,
            filetypes=filetypes,
            files=files,
            other_studies_rd=other_studies_rd,
            prep_template=prep_template,
            study=study,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            investigation_type=prep_template.investigation_type,
            is_editable=is_editable,
            preprocess_options=preprocess_options,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            show_preprocess_btn=show_preprocess_btn,
            no_preprocess_msg=no_preprocess_msg)
Exemplo n.º 7
0
    def test_iter(self):
        obs = list(PreprocessedIlluminaParams.iter())
        exp = [PreprocessedIlluminaParams(1)]

        for o, e in zip(obs, exp):
            self.assertEqual(o.id, e.id)
Exemplo n.º 8
0
    def render(self, prep, study_id, is_editable, ena_terms,
               study_status, user_defined_terms, raw_data_files):
        # Check if the request came from a local source
        is_local_request = self._is_local()

        prep_id = prep.id
        status_class1, status_class2, status_color = STATUS_STYLER[prep.status]
        data_type = prep.data_type()
        raw_data = RawData(prep.raw_data)
        filepaths = prep.get_filepaths()
        investigation_type = prep.investigation_type
        preprocessed_data = prep.preprocessed_data
        preprocessing_status = prep.preprocessing_status

        if raw_data.filetype in ('SFF', 'FASTA'):
            param_iter = Preprocessed454Params.iter()
        elif raw_data.filetype == 'FASTQ':
            param_iter = PreprocessedIlluminaParams.iter()
        else:
            raise ValueError("Don't know what to do but this exception will "
                             "never actually get shown anywhere because why "
                             "would you want to see tracebacks?")

        preprocess_options = []
        for param in param_iter:
            text = ("<b>%s:</b> %s" % (k, v)
                    for k, v in viewitems(param.values))
            preprocess_options.append((param.id,
                                       param.name,
                                       '<br>'.join(text)))

        # Unfortunately, both the prep template and the qiime mapping files
        # have the sample type. The way to differentiate them is if we have
        # the substring 'qiime' in the basename
        _fp_type = (lambda fp: "Qiime mapping"
                    if 'qiime' in basename(fp) else "Prep template")
        filepaths = [(id_, fp, _fp_type(fp)) for id_, fp in filepaths]

        # Check if the template have all the required columns for preprocessing
        if prep.data_type() in TARGET_GENE_DATA_TYPES:
            key = ('demultiplex_multiple' if len(raw_data_files) > 2
                   else 'demultiplex')
            missing_cols = prep.check_restrictions(
                [PREP_TEMPLATE_COLUMNS_TARGET_GENE[key]])
            show_preprocess_btn = len(missing_cols) == 0
            if not show_preprocess_btn:
                no_preprocess_msg = (
                    "Preprocessing disabled due to missing columns in the "
                    "prep template: %s" % ', '.join(missing_cols))
            else:
                no_preprocess_msg = None
        else:
            show_preprocess_btn = True
            no_preprocess_msg = None

        return self.render_string(
            "study_description_templates/prep_template_panel.html",
            prep_id=prep_id,
            status_class1=status_class1,
            status_class2=status_class2,
            status_color=status_color,
            data_type=data_type,
            filepaths=filepaths,
            investigation_type=investigation_type,
            preprocessed_data=preprocessed_data,
            preprocessing_status=preprocessing_status,
            study_id=study_id,
            is_local_request=is_local_request,
            is_editable=is_editable,
            ena_terms=ena_terms,
            study_status=study_status,
            user_defined_terms=user_defined_terms,
            preprocess_options=preprocess_options,
            show_preprocess_btn=show_preprocess_btn,
            no_preprocess_msg=no_preprocess_msg)