Пример #1
0
def create_sample_template(fp, study, is_mapping_file, data_type=None):
    """Creates a sample template

    Parameters
    ----------
    fp : str
        The file path to the template file
    study : qiita_db.study.Study
        The study to add the sample template to
    is_mapping_file : bool
        Whether `fp` contains a mapping file or a sample template
    data_type : str, optional
        If `is_mapping_file` is True, the data type of the prep template to be
        created

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    # The imports need to be in here because this code is executed in
    # the ipython workers
    import warnings
    from os import remove
    from qiita_db.metadata_template.sample_template import SampleTemplate
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_ware.metadata_pipeline import (
        create_templates_from_qiime_mapping_file)

    status = 'success'
    msg = ''
    try:
        with warnings.catch_warnings(record=True) as warns:
            if is_mapping_file:
                create_templates_from_qiime_mapping_file(fp, study,
                                                         data_type)
            else:
                SampleTemplate.create(load_template_to_dataframe(fp),
                                      study)
            remove(fp)

            # join all the warning messages into one. Note that this
            # info will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
        # Some error occurred while processing the sample template
        # Show the error to the user so they can fix the template
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Пример #2
0
def create_sample_template(fp, study, is_mapping_file, data_type=None):
    """Creates a sample template

    Parameters
    ----------
    fp : str
        The file path to the template file
    study : qiita_db.study.Study
        The study to add the sample template to
    is_mapping_file : bool
        Whether `fp` contains a mapping file or a sample template
    data_type : str, optional
        If `is_mapping_file` is True, the data type of the prep template to be
        created

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    # The imports need to be in here because this code is executed in
    # the ipython workers
    import warnings
    from os import remove
    from qiita_db.metadata_template.sample_template import SampleTemplate
    from qiita_db.metadata_template.util import load_template_to_dataframe
    from qiita_ware.metadata_pipeline import (
        create_templates_from_qiime_mapping_file)

    status = 'success'
    msg = ''
    try:
        with warnings.catch_warnings(record=True) as warns:
            if is_mapping_file:
                create_templates_from_qiime_mapping_file(fp, study,
                                                         data_type)
            else:
                SampleTemplate.create(load_template_to_dataframe(fp),
                                      study)
            remove(fp)

            # join all the warning messages into one. Note that this
            # info will be ignored if an exception is raised
            if warns:
                msg = '\n'.join(set(str(w.message) for w in warns))
                status = 'warning'
    except Exception as e:
        # Some error occurred while processing the sample template
        # Show the error to the user so they can fix the template
        status = 'danger'
        msg = str(e)

    return {'status': status, 'message': msg}
Пример #3
0
    def test_delete_sample_template(self):
        # Error case
        job = self._create_job('delete_sample_template', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn(
            "Sample template cannot be erased because there are "
            "prep templates associated", job.log.msg)

        # Success case
        info = {
            "timeseries_type_id": '1',
            "metadata_complete": 'true',
            "mixs_compliant": 'true',
            "number_samples_collected": 25,
            "number_samples_promised": 28,
            "study_alias": "TDST",
            "study_description": "Test delete sample template",
            "study_abstract": "Test delete sample template",
            "principal_investigator_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'),
                             "Delete Sample Template test", info)
        metadata = pd.DataFrame.from_dict(
            {
                'Sample1': {
                    'physical_specimen_location': 'location1',
                    'physical_specimen_remaining': 'true',
                    'dna_extracted': 'true',
                    'sample_type': 'type1',
                    'collection_timestamp': '2014-05-29 12:24:15',
                    'host_subject_id': 'NotIdentified',
                    'Description': 'Test Sample 1',
                    'latitude': '42.42',
                    'longitude': '41.41',
                    'taxon_id': '9606',
                    'scientific_name': 'h**o sapiens'
                }
            },
            orient='index',
            dtype=str)
        SampleTemplate.create(metadata, study)

        job = self._create_job('delete_sample_template', {'study': study.id})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertFalse(SampleTemplate.exists(study.id))
Пример #4
0
    def test_get_lat_longs_EMP_portal(self):
        info = {
            'timeseries_type_id': 1,
            'lab_person_id': None,
            'principal_investigator_id': 3,
            'metadata_complete': False,
            'mixs_compliant': True,
            'study_description': 'desc',
            'study_alias': 'alias',
            'study_abstract': 'abstract'}

        study = Study.create(User('*****@*****.**'), 'test_study_1', efo=[1],
                             info=info)
        Portal('EMP').add_studies([study.id])

        md = {
            'my.sample': {
                'physical_specimen_location': 'location1',
                'physical_specimen_remaining': True,
                'dna_extracted': True,
                'sample_type': 'type1',
                'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51),
                'host_subject_id': 'NotIdentified',
                'Description': 'Test Sample 4',
                'str_column': 'Value for sample 4',
                'int_column': 4,
                'latitude': 42.42,
                'longitude': 41.41,
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens'}
        }

        md_ext = pd.DataFrame.from_dict(md, orient='index')
        SampleTemplate.create(md_ext, study)

        qiita_config.portal = 'EMP'

        obs = get_lat_longs()
        exp = [[42.42, 41.41]]

        self.assertItemsEqual(obs, exp)
Пример #5
0
    def test_delete_sample_template(self):
        # Error case
        job = self._create_job('delete_sample_template', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn("Sample template cannot be erased because there are "
                      "prep templates associated", job.log.msg)

        # Success case
        info = {"timeseries_type_id": '1',
                "metadata_complete": 'true',
                "mixs_compliant": 'true',
                "number_samples_collected": 25,
                "number_samples_promised": 28,
                "study_alias": "TDST",
                "study_description": "Test delete sample template",
                "study_abstract": "Test delete sample template",
                "principal_investigator_id": StudyPerson(1)}
        study = Study.create(User('*****@*****.**'),
                             "Delete Sample Template test", info)
        metadata = pd.DataFrame.from_dict(
            {'Sample1': {'physical_specimen_location': 'location1',
                         'physical_specimen_remaining': 'true',
                         'dna_extracted': 'true',
                         'sample_type': 'type1',
                         'collection_timestamp': '2014-05-29 12:24:15',
                         'host_subject_id': 'NotIdentified',
                         'Description': 'Test Sample 1',
                         'latitude': '42.42',
                         'longitude': '41.41',
                         'taxon_id': '9606',
                         'scientific_name': 'h**o sapiens'}},
            orient='index', dtype=str)
        SampleTemplate.create(metadata, study)

        job = self._create_job('delete_sample_template', {'study': study.id})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        self.assertFalse(SampleTemplate.exists(study.id))
Пример #6
0
    def test_sample_template_handler_patch_request(self):
        user = User('*****@*****.**')

        # Test user doesn't have access
        with self.assertRaisesRegexp(HTTPError,
                                     'User does not have access to study'):
            sample_template_handler_patch_request(
                User('*****@*****.**'), "remove",
                "/1/columns/season_environment/")

        # Test study doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Study does not exist'):
            sample_template_handler_patch_request(
                user, "remove", "/10000/columns/season_environment/")

        # Test sample template doesn't exist
        new_study = self._create_study('Patching test')
        with self.assertRaisesRegexp(HTTPError,
                                     "Study %s doesn't have sample information"
                                     % new_study.id):
            sample_template_handler_patch_request(
                user, "remove", "/%s/columns/season_environment/"
                                % new_study.id)

        # Test wrong operation value
        with self.assertRaisesRegexp(
                HTTPError, 'Operation add not supported. Current supported '
                           'operations: remove.'):
            sample_template_handler_patch_request(
                user, 'add', '/1/columns/season_environment')

        # Test wrong path parameter < 2
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, 'ignored', '1')

        # TESTS FOR OPERATION: remove
        # Test wrong path parameter
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(
                user, 'remove', '/1/season_environment/')

        # Add sample information to the new study so we can delete one column
        # without affecting the other tests
        md = pd.DataFrame.from_dict(
            {'Sample1': {'col1': 'val1', 'col2': 'val2'}},
            orient='index', dtype=str)
        st = SampleTemplate.create(md, new_study)

        # Test success
        obs = sample_template_handler_patch_request(
            user, "remove", "/%s/columns/col2/"
                            % new_study.id)
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_%s' % new_study.id)
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
        self.assertNotIn('col2', st.categories())

        # TESTS FOR OPERATION: replace
        # Test incorrect path parameter with replace
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, "replace", "/1/")

        # Test attribute not found
        with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'):
            sample_template_handler_patch_request(user, "replace", "/1/name")

        # Test missing value
        with self.assertRaisesRegexp(HTTPError,
                                     'Value is required when updating sample '
                                     'information'):
            sample_template_handler_patch_request(user, "replace", "/1/data")

        # Test file doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Filepath not found'):
            sample_template_handler_patch_request(user, "replace", "/1/data",
                                                  req_value='DoesNotExist')

        # Test success
        obs = sample_template_handler_patch_request(
            user, "replace", "/1/data", req_value='uploaded_file.txt')
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_1')
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
Пример #7
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", info)
        metadata_dict = {
            'Sample1': {
                'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 1'
            },
            'Sample2': {
                'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 2'
            },
            'Sample3': {
                'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 3'
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTC',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 1"
            },
            'Sample2': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTA',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 2"
            },
            'Sample3': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTT',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 3"
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)],
                              "Demultiplexed",
                              prep_template=pt)

        return ppd
Пример #8
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        # ignoring warnings generated when adding templates
        simplefilter("ignore")
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info)
        metadata_dict = {
            'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 1'},
            'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 2'},
            'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 3'}
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTC',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 1"},
            'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTA',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 2"},
            'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTT',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 3"},
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create(
            [(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Пример #9
0
    def test_delete_study(self):
        # as samples have been submitted to EBI, this will fail
        job = self._create_job('delete_study', {'study': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'error')
        self.assertIn("Artifact 2 has been submitted to EBI", job.log.msg)
        # making sure the analysis, first thing to delete, still exists
        self.assertTrue(Analysis.exists(1))

        info = {
            "timeseries_type_id": '1',
            "metadata_complete": 'true',
            "mixs_compliant": 'true',
            "study_alias": "FCM",
            "study_description": "Microbiome of people who eat nothing but "
            "fried chicken",
            "study_abstract": "Exploring how a high fat diet changes the "
            "gut microbiome",
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        new_study = Study.create(User('*****@*****.**'),
                                 "Fried Chicken Microbiome %s" % time(), info)

        # adding tags
        new_study.update_tags(User('*****@*****.**'), ['my new tag!'])

        # creating a sample information file
        metadata = pd.DataFrame.from_dict(
            {
                'Sample1': {
                    'physical_specimen_location': 'location1',
                    'taxon_id': '9606',
                    'scientific_name': 'h**o sapiens'
                },
                'Sample2': {
                    'physical_specimen_location': 'location1',
                    'taxon_id': '9606',
                    'scientific_name': 'h**o sapiens'
                },
                'Sample3': {
                    'physical_specimen_location': 'location1',
                    'taxon_id': '9606',
                    'scientific_name': 'h**o sapiens'
                }
            },
            orient='index')
        SampleTemplate.create(metadata, new_study)
        # creating a preparation information file
        metadata = pd.DataFrame.from_dict(
            {
                'Sample1': {
                    'center_name': 'ANL',
                    'target_subfragment': 'V4',
                    'center_project_name': 'Test Project'
                }
            },
            orient='index',
            dtype=str)
        PrepTemplate.create(metadata, new_study, '16S')

        job = self._create_job('delete_study', {'study': new_study.id})
        private_task(job.id)
        self.assertEqual(job.status, 'success')

        # making sure the study doesn't exist
        with self.assertRaises(QiitaDBUnknownIDError):
            Study(new_study.id)
Пример #10
0
    def test_sample_template_handler_patch_request(self):
        user = User('*****@*****.**')

        # Test user doesn't have access
        with self.assertRaisesRegexp(HTTPError,
                                     'User does not have access to study'):
            sample_template_handler_patch_request(
                User('*****@*****.**'), "remove",
                "/1/columns/season_environment/")

        # Test study doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Study does not exist'):
            sample_template_handler_patch_request(
                user, "remove", "/10000/columns/season_environment/")

        # Test sample template doesn't exist
        new_study = self._create_study('Patching test')
        with self.assertRaisesRegexp(
                HTTPError,
                "Study %s doesn't have sample information" % new_study.id):
            sample_template_handler_patch_request(
                user, "remove",
                "/%s/columns/season_environment/" % new_study.id)

        # Test wrong operation value
        with self.assertRaisesRegexp(
                HTTPError, 'Operation add not supported. Current supported '
                'operations: remove.'):
            sample_template_handler_patch_request(
                user, 'add', '/1/columns/season_environment')

        # Test wrong path parameter < 2
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, 'ignored', '1')

        # TESTS FOR OPERATION: remove
        # Test wrong path parameter
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, 'remove',
                                                  '/1/season_environment/')

        # Add sample information to the new study so we can delete one column
        # without affecting the other tests
        md = pd.DataFrame.from_dict(
            {'Sample1': {
                'col1': 'val1',
                'col2': 'val2'
            }},
            orient='index',
            dtype=str)
        st = SampleTemplate.create(md, new_study)

        # Test success
        obs = sample_template_handler_patch_request(
            user, "remove", "/%s/columns/col2/" % new_study.id)
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_%s' % new_study.id)
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
        self.assertNotIn('col2', st.categories())

        # TESTS FOR OPERATION: replace
        # Test incorrect path parameter with replace
        with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'):
            sample_template_handler_patch_request(user, "replace", "/1/")

        # Test attribute not found
        with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'):
            sample_template_handler_patch_request(user, "replace", "/1/name")

        # Test missing value
        with self.assertRaisesRegexp(
                HTTPError, 'Value is required when updating sample '
                'information'):
            sample_template_handler_patch_request(user, "replace", "/1/data")

        # Test file doesn't exist
        with self.assertRaisesRegexp(HTTPError, 'Filepath not found'):
            sample_template_handler_patch_request(user,
                                                  "replace",
                                                  "/1/data",
                                                  req_value='DoesNotExist')

        # Test success
        obs = sample_template_handler_patch_request(
            user, "replace", "/1/data", req_value='uploaded_file.txt')
        self.assertEqual(obs.keys(), ['job'])
        job_info = r_client.get('sample_template_1')
        self.assertIsNotNone(job_info)

        # Wait until the job is done
        wait_for_processing_job(loads(job_info)['job_id'])
Пример #11
0
    def process_sample_template(self, study, user, callback):
        """Process a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the arguments "sample_template" and
        # "data_type" must be defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')
        data_type = self.get_argument('data_type')

        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been added" % sample_template
        msg_level = "success"
        is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

        try:
            if is_mapping_file and not data_type:
                raise ValueError("Please, choose a data type if uploading a "
                                 "QIIME mapping file")

            with warnings.catch_warnings(record=True) as warns:
                if is_mapping_file:
                    create_templates_from_qiime_mapping_file(fp_rsp, study,
                                                             int(data_type))
                else:
                    SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                                          study)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this
                # info will be ignored if an exception is raised
                if warns:
                    msg = '; '.join([convert_text_html(str(w.message))
                                     for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError,
                QiitaDBError, QiitaWareError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            error_msg = ('parsing the QIIME mapping file'
                         if is_mapping_file
                         else 'parsing the sample template')
            msg = html_error_message % (error_msg, basename(fp_rsp),
                                        str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, None, None, None))
Пример #12
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [
        c.lower() if c.lower() in CONTROLLED_COLS else c
        for c in qiime_map.columns
    ]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in restriction_set.values():
            for cols in restriction.columns.keys():
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type") if isinstance(
        data_type, int) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.loc[:, st_cols]
    pt_md = qiime_map.loc[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
Пример #13
0
def create_templates_from_qiime_mapping_file(fp, study, data_type):
    """Creates a sample template and a prep template from qiime mapping file

    Parameters
    ----------
    fp : str or file-like object
        Path to the QIIME mapping file
    study : Study
        The study to which the sample template belongs to
    data_type : str or int
        The data_type of the prep_template

    Returns
    -------
    (SampleTemplate, PrepTemplate)
        The templates created from the QIIME mapping file
    """
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')

    # There are a few columns in the QIIME mapping file that are special and
    # we know how to deal with them
    rename_cols = {
        'BarcodeSequence': 'barcode',
        'LinkerPrimerSequence': 'primer',
        'Description': 'description',
    }

    if 'ReverseLinkerPrimer' in qiime_map:
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'

    missing = set(rename_cols).difference(qiime_map.columns)
    if missing:
        raise QiitaWareError(
            "Error generating the templates from the QIIME mapping file. "
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))

    qiime_map.rename(columns=rename_cols, inplace=True)

    # Fix the casing in the columns that we control
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
                         for c in qiime_map.columns]

    # Figure out which columns belong to the prep template
    def _col_iterator(restriction_set):
        for restriction in viewvalues(restriction_set):
            for cols in viewkeys(restriction.columns):
                yield cols

    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))

    data_type_str = (convert_from_id(data_type, "data_type")
                     if isinstance(data_type, int) else data_type)

    if data_type_str in TARGET_GENE_DATA_TYPES:
        pt_cols.update(
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
        pt_cols.add('reverselinkerprimer')

    qiime_cols = set(qiime_map.columns)
    pt_cols = qiime_cols.intersection(pt_cols)
    st_cols = qiime_cols.difference(pt_cols)

    st_md = qiime_map.ix[:, st_cols]
    pt_md = qiime_map.ix[:, pt_cols]

    return (SampleTemplate.create(st_md, study),
            PrepTemplate.create(pt_md, study, data_type))
Пример #14
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1),
        }
        study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info)
        metadata_dict = {
            "Sample1": {
                "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 1",
            },
            "Sample2": {
                "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 2",
            },
            "Sample3": {
                "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            "Sample1": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTC",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 1",
            },
            "Sample2": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTA",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 2",
            },
            "Sample3": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTT",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics")
        fna_fp = join(self.temp_dir, "seqs.fna")
        demux_fp = join(self.temp_dir, "demux.seqs")
        with open(fna_fp, "w") as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Пример #15
0
    def process_sample_template(self, study, user, callback):
        """Process a sample template from the POST method

        Parameters
        ----------
        study : Study
            The current study object
        user : User
            The current user object
        callback : function
            The callback function to call with the results once the processing
            is done

        Raises
        ------
        HTTPError
            If the sample template file does not exists
        """
        # If we are on this function, the arguments "sample_template" and
        # "data_type" must be defined. If not, let tornado raise its error
        sample_template = self.get_argument('sample_template')
        data_type = self.get_argument('data_type')

        # Get the uploads folder
        _, base_fp = get_mountpoint("uploads")[0]
        # Get the path of the sample template in the uploads folder
        fp_rsp = join(base_fp, str(study.id), sample_template)

        if not exists(fp_rsp):
            # The file does not exist, fail nicely
            raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

        # Define here the message and message level in case of success
        msg = "The sample template '%s' has been added" % sample_template
        msg_level = "success"
        is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

        try:
            if is_mapping_file and not data_type:
                raise ValueError("Please, choose a data type if uploading a "
                                 "QIIME mapping file")

            with warnings.catch_warnings(record=True) as warns:
                if is_mapping_file:
                    create_templates_from_qiime_mapping_file(
                        fp_rsp, study, int(data_type))
                else:
                    SampleTemplate.create(load_template_to_dataframe(fp_rsp),
                                          study)
                remove(fp_rsp)

                # join all the warning messages into one. Note that this
                # info will be ignored if an exception is raised
                if warns:
                    msg = '; '.join(
                        [convert_text_html(str(w.message)) for w in warns])
                    msg_level = 'warning'

        except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                QiitaDBDuplicateError, IOError, ValueError, KeyError,
                CParserError, QiitaDBDuplicateHeaderError, QiitaDBError,
                QiitaWareError) as e:
            # Some error occurred while processing the sample template
            # Show the error to the user so they can fix the template
            error_msg = ('parsing the QIIME mapping file'
                         if is_mapping_file else 'parsing the sample template')
            msg = html_error_message % (error_msg, basename(fp_rsp), str(e))
            msg = convert_text_html(msg)
            msg_level = "danger"

        callback((msg, msg_level, None, None, None))