Пример #1
0
    def write_demux_files(self, prep_template, generate_hdf5=True):
        """Writes a demux test file to avoid duplication of code"""
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        if generate_hdf5:
            with open(fna_fp, 'w') as f:
                f.write(FASTA_EXAMPLE)
            with File(demux_fp, "w") as f:
                to_hdf5(fna_fp, f)
        else:
            with open(demux_fp, 'w') as f:
                f.write('')

        if prep_template.artifact is None:
            ppd = Artifact.create(
                [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template,
                can_be_submitted_to_ebi=True, can_be_submitted_to_vamps=True)
        else:
            params = Parameters.from_default_params(
                DefaultParameters(1),
                {'input_data': prep_template.artifact.id})
            ppd = Artifact.create(
                [(demux_fp, 6)], "Demultiplexed",
                parents=[prep_template.artifact], processing_parameters=params,
                can_be_submitted_to_ebi=True, can_be_submitted_to_vamps=True)
        return ppd
Пример #2
0
    def write_demux_files(self, prep_template, generate_hdf5=True):
        """Writes a demux test file to avoid duplication of code"""
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        if generate_hdf5:
            with open(fna_fp, 'w') as f:
                f.write(FASTA_EXAMPLE)
            with File(demux_fp, "w") as f:
                to_hdf5(fna_fp, f)
        else:
            with open(demux_fp, 'w') as f:
                f.write('')

        if prep_template.artifact is None:
            ppd = Artifact.create([(demux_fp, 6)],
                                  "Demultiplexed",
                                  prep_template=prep_template)
        else:
            params = Parameters.from_default_params(
                DefaultParameters(1),
                {'input_data': prep_template.artifact.id})
            ppd = Artifact.create([(demux_fp, 6)],
                                  "Demultiplexed",
                                  parents=[prep_template.artifact],
                                  processing_parameters=params)
        return ppd
Пример #3
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
        self._files_to_remove = []

        # creating temporal files and artifact
        # NOTE: we don't need to remove the artifact created cause it's
        # used to test the delete functionality
        fd, fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        with open(fp, 'w') as f:
            f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 "
                    "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n"
                    "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n")
        # 4 Demultiplexed
        filepaths_processed = [(fp, 4)]
        # 1 for default parameters and input data
        exp_params = Parameters.from_default_params(DefaultParameters(1),
                                                    {'input_data': 1})
        self.artifact = Artifact.create(filepaths_processed, "Demultiplexed",
                                        parents=[Artifact(1)],
                                        processing_parameters=exp_params)
Пример #4
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
        self._files_to_remove = []

        # creating temporal files and artifact
        # NOTE: we don't need to remove the artifact created cause it's
        # used to test the delete functionality
        fd, fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        with open(fp, 'w') as f:
            f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 "
                    "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n"
                    "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n")
        # 4 Demultiplexed
        filepaths_processed = [(fp, 4)]
        # 1 for default parameters and input data
        exp_params = Parameters.from_default_params(DefaultParameters(1),
                                                    {'input_data': 1})
        self.artifact = Artifact.create(filepaths_processed, "Demultiplexed",
                                        parents=[Artifact(1)],
                                        processing_parameters=exp_params)
Пример #5
0
    def post(self, study_id, prep_id):
        study = self.safe_get_study(study_id)
        if study is None:
            return

        prep_id = to_int(prep_id)
        try:
            p = PrepTemplate(prep_id)
        except QiitaDBUnknownIDError:
            self.fail('Preparation not found', 404)
            return

        if p.study_id != study.id:
            self.fail('Preparation ID not associated with the study', 409)
            return

        artifact_deets = json_decode(self.request.body)
        _, upload = get_mountpoint('uploads')[0]
        base = os.path.join(upload, study_id)
        filepaths = [(os.path.join(base, fp), fp_type)
                     for fp, fp_type in artifact_deets['filepaths']]

        try:
            art = Artifact.create(filepaths,
                                  artifact_deets['artifact_type'],
                                  artifact_deets['artifact_name'],
                                  p)
        except QiitaError as e:
            self.fail(str(e), 406)
            return

        self.write({'id': art.id})
        self.set_status(201)
        self.finish()
Пример #6
0
    def test_delete_analysis(self):
        # adding extra filepaths to make sure the delete works as expected, we
        # basically want 8 -> 9 -> 10 -> 12 -> 14
        #                       -> 11 -> 13
        fd, fp10 = mkstemp(suffix='_table.biom')
        close(fd)
        fd, fp11 = mkstemp(suffix='_table.biom')
        close(fd)
        fd, fp12 = mkstemp(suffix='_table.biom')
        close(fd)
        fd, fp13 = mkstemp(suffix='_table.biom')
        close(fd)
        fd, fp14 = mkstemp(suffix='_table.biom')
        close(fd)
        with biom_open(fp10, 'w') as f:
            et.to_hdf5(f, "test")
        with biom_open(fp11, 'w') as f:
            et.to_hdf5(f, "test")
        with biom_open(fp12, 'w') as f:
            et.to_hdf5(f, "test")
        with biom_open(fp13, 'w') as f:
            et.to_hdf5(f, "test")
        with biom_open(fp14, 'w') as f:
            et.to_hdf5(f, "test")
        self._clean_up_files.extend([fp10, fp11, fp12, fp13, fp14])

        # copying some processing parameters
        a9 = Artifact(9)
        pp = a9.processing_parameters

        # 7: BIOM
        a10 = Artifact.create([(fp10, 7)],
                              "BIOM",
                              parents=[a9],
                              processing_parameters=pp)
        a11 = Artifact.create([(fp11, 7)],
                              "BIOM",
                              parents=[a9],
                              processing_parameters=pp)
        a12 = Artifact.create([(fp12, 7)],
                              "BIOM",
                              parents=[a10],
                              processing_parameters=pp)
        Artifact.create([(fp13, 7)],
                        "BIOM",
                        parents=[a11],
                        processing_parameters=pp)
        Artifact.create([(fp14, 7)],
                        "BIOM",
                        parents=[a12],
                        processing_parameters=pp)

        job = self._create_job('delete_analysis', {'analysis_id': 1})
        private_task(job.id)
        self.assertEqual(job.status, 'success')
        with self.assertRaises(QiitaDBUnknownIDError):
            Analysis(1)
Пример #7
0
def create_raw_data(artifact_type, prep_template, filepaths, name=None):
    """Creates a new raw data

    Needs to be dispachable because it moves large files

    Parameters
    ----------
    artifact_type: str
        The artifact type
    prep_template : qiita_db.metadata_template.prep_template.PrepTemplate
        The template to attach the artifact
    filepaths : list of (str, str)
        The list with filepaths and their filepath types
    name : str, optional
        The name of the new artifact

    Returns
    -------
    dict of {str: str}
        A dict of the form {'status': str, 'message': str}
    """
    from qiita_db.artifact import Artifact

    status = 'success'
    msg = ''
    try:
        Artifact.create(filepaths, artifact_type, name=name,
                        prep_template=prep_template)
    except Exception as e:
        # We should hit this exception rarely (that's why it is an
        # exception)  since at this point we have done multiple checks.
        # However, it can occur in weird cases, so better let the GUI know
        # that this failed
        return {'status': 'danger',
                'message': "Error creating artifact: %s" % str(e)}

    return {'status': status, 'message': msg}
Пример #8
0
    def test_submit_to_EBI(self):
        # setting up test
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE)
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        pt = PrepTemplate(1)
        params = Parameters.from_default_params(DefaultParameters(1),
                                                {'input_data': pt.artifact.id})
        artifact = Artifact.create([(demux_fp, 6)],
                                   "Demultiplexed",
                                   parents=[pt.artifact],
                                   processing_parameters=params)

        # submit job
        job = self._create_job('submit_to_EBI', {
            'artifact': artifact.id,
            'submission_type': 'VALIDATE'
        })
        job._set_status('in_construction')
        job.submit()

        # wait for the job to fail, and check that the status is submitting
        checked_submitting = True
        while job.status != 'error':
            if checked_submitting:
                self.assertEqual('submitting',
                                 artifact.study.ebi_submission_status)
                checked_submitting = False
        # once it fails wait for a few to check status again
        sleep(5)
        exp = 'Some artifact submissions failed: %d' % artifact.id
        obs = artifact.study.ebi_submission_status
        self.assertEqual(obs, exp)
        # make sure that the error is correct, we have 2 options
        if environ.get('ASPERA_SCP_PASS', '') != '':
            self.assertIn('1.SKM2.640199', job.log.msg)
        else:
            self.assertIn('ASCP Error:', job.log.msg)
        # wait for everything to finish to avoid DB deadlocks
        sleep(5)
Пример #9
0
    def test_submit_to_EBI(self):
        # setting up test
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE)
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        pt = PrepTemplate(1)
        params = Parameters.from_default_params(
            DefaultParameters(1), {'input_data': pt.artifact.id})
        artifact = Artifact.create(
            [(demux_fp, 6)], "Demultiplexed", parents=[pt.artifact],
            processing_parameters=params)

        # submit job
        job = self._create_job('submit_to_EBI', {
            'artifact': artifact.id, 'submission_type': 'VALIDATE'})
        job._set_status('in_construction')
        job.submit()

        # wait for the job to fail, and check that the status is submitting
        checked_submitting = True
        while job.status != 'error':
            if checked_submitting:
                self.assertEqual('submitting',
                                 artifact.study.ebi_submission_status)
                checked_submitting = False
        # once it fails wait for a few to check status again
        sleep(5)
        exp = 'Some artifact submissions failed: %d' % artifact.id
        obs = artifact.study.ebi_submission_status
        self.assertEqual(obs, exp)
        # make sure that the error is correct, we have 2 options
        if environ.get('ASPERA_SCP_PASS', '') != '':
            self.assertIn('1.SKM2.640199', job.log.msg)
        else:
            self.assertIn('ASCP Error:', job.log.msg)
        # wait for everything to finish to avoid DB deadlocks
        sleep(5)
Пример #10
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1),
        }
        study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info)
        metadata_dict = {
            "Sample1": {
                "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 1",
            },
            "Sample2": {
                "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 2",
            },
            "Sample3": {
                "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0),
                "physical_specimen_location": "location1",
                "taxon_id": 9606,
                "scientific_name": "h**o sapiens",
                "Description": "Test Sample 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            "Sample1": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTC",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 1",
            },
            "Sample2": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTA",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 2",
            },
            "Sample3": {
                "primer": "GTGCCAGCMGCCGCGGTAA",
                "barcode": "CGTAGAGCTCTT",
                "center_name": "KnightLab",
                "platform": "ILLUMINA",
                "instrument_model": "Illumina MiSeq",
                "library_construction_protocol": "Protocol ABC",
                "experiment_design_description": "Random value 3",
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics")
        fna_fp = join(self.temp_dir, "seqs.fna")
        demux_fp = join(self.temp_dir, "demux.seqs")
        with open(fna_fp, "w") as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Пример #11
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", info)
        metadata_dict = {
            'Sample1': {
                'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 1'
            },
            'Sample2': {
                'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 2'
            },
            'Sample3': {
                'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                'physical_specimen_location': 'location1',
                'taxon_id': 9606,
                'scientific_name': 'h**o sapiens',
                'Description': 'Test Sample 3'
            }
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTC',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 1"
            },
            'Sample2': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTA',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 2"
            },
            'Sample3': {
                'primer': 'GTGCCAGCMGCCGCGGTAA',
                'barcode': 'CGTAGAGCTCTT',
                'center_name': 'KnightLab',
                'platform': 'ILLUMINA',
                'instrument_model': 'Illumina MiSeq',
                'library_construction_protocol': 'Protocol ABC',
                'experiment_design_description': "Random value 3"
            },
        }
        metadata = pd.DataFrame.from_dict(metadata_dict,
                                          orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create([(demux_fp, 6)],
                              "Demultiplexed",
                              prep_template=pt)

        return ppd
Пример #12
0
    def test_download_study(self):
        tmp_dir = mkdtemp()
        self._clean_up_files.append(tmp_dir)

        biom_fp = join(tmp_dir, 'otu_table.biom')
        smr_dir = join(tmp_dir, 'sortmerna_picked_otus')
        log_dir = join(smr_dir, 'seqs_otus.log')
        tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz')

        with biom_open(biom_fp, 'w') as f:
            et.to_hdf5(f, "test")
        makedirs(smr_dir)
        with open(log_dir, 'w') as f:
            f.write('\n')
        with open(tgz, 'w') as f:
            f.write('\n')

        files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]

        params = Parameters.from_default_params(
            next(Command(3).default_parameter_sets), {'input_data': 1})
        a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)],
                            processing_parameters=params)
        for x in a.filepaths:
            self._clean_up_files.append(x['fp'])

        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '1579715020 1256812 /protected/processed_data/1_study_1001_closed_'
            'reference_otu_table.biom processed_data/1_study_1001_closed_'
            'reference_otu_table.biom\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/4_mapping_file.txt\n'
            '1579715020 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table.biom processed_data/'
            '1_study_1001_closed_reference_otu_table.biom\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/5_mapping_file.txt\n'
            '1579715020 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table_Silva.biom processed_data'
            '/1_study_1001_closed_reference_otu_table_Silva.biom\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/6_mapping_file.txt\n'
            '1756512010 1093210 /protected/BIOM/7/biom_table.biom '
            'BIOM/7/biom_table.biom\n'
            '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/7_mapping_file.txt\n'
            '[0-9]* [0-9]* /protected/BIOM/{0}/otu_table.biom '
            'BIOM/{0}/otu_table.biom\n'
            '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log '
            'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
            '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.'
            'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegex(response.body.decode('ascii'), exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        a.visibility = 'public'
        response = self.get('/download_study_bioms/1')
        # returning visibility
        a.visibility = 'private'
        self.assertEqual(response.code, 200)
        # we should have the same files than the previous test, except artifact
        # and mapping file 7: position 6 and 7; thus removing 6 twice
        exp = exp.split('\n')
        exp.pop(6)
        exp.pop(6)
        exp = '\n'.join(exp)
        self.assertRegex(response.body.decode('ascii'), exp)
Пример #13
0
def artifact_post_req(user_id, filepaths, artifact_type, name,
                      prep_template_id, artifact_id=None):
    """Creates the initial artifact for the prep template

    Parameters
    ----------
    user_id : str
        User adding the atrifact
    filepaths : dict of str
        Comma-separated list of files to attach to the artifact,
        keyed by file type
    artifact_type : str
        The type of the artifact
    name : str
        Name to give the artifact
    prep_template_id : int or str castable to int
        Prep template to attach the artifact to
    artifact_id : int or str castable to int, optional
        The id of the imported artifact

    Returns
    -------
    dict of objects
        A dictionary containing the new artifact ID
        {'status': status,
         'message': message,
         'artifact': id}
    """
    prep = PrepTemplate(int(prep_template_id))
    study_id = prep.study_id

    # First check if the user has access to the study
    access_error = check_access(study_id, user_id)
    if access_error:
        return access_error

    if artifact_id:
        # if the artifact id has been provided, import the artifact
        try:
            artifact = Artifact.copy(Artifact(artifact_id), prep)
        except Exception as e:
            # We should hit this exception rarely (that's why it is an
            # exception)  since at this point we have done multiple checks.
            # However, it can occur in weird cases, so better let the GUI know
            # that this failed
            return {'status': 'error',
                    'message': "Error creating artifact: %s" % str(e)}

    else:
        uploads_path = get_mountpoint('uploads')[0][1]
        path_builder = partial(join, uploads_path, str(study_id))
        cleaned_filepaths = []
        for ftype, file_list in viewitems(filepaths):
            # JavaScript sends us this list as a comma-separated list
            for fp in file_list.split(','):
                # JavaScript will send this value as an empty string if the
                # list of files was empty. In such case, the split will
                # generate a single element containing the empty string. Check
                # for that case here and, if fp is not the empty string,
                # proceed to check if the file exists
                if fp:
                    # Check if filepath being passed exists for study
                    full_fp = path_builder(fp)
                    exists = check_fp(study_id, full_fp)
                    if exists['status'] != 'success':
                        return {'status': 'error',
                                'message': 'File does not exist: %s' % fp}
                    cleaned_filepaths.append((full_fp, ftype))

        # This should never happen, but it doesn't hurt to actually have
        # a explicit check, in case there is something odd with the JS
        if not cleaned_filepaths:
            return {'status': 'error',
                    'message': "Can't create artifact, no files provided."}

        try:
            artifact = Artifact.create(cleaned_filepaths, artifact_type,
                                       name=name, prep_template=prep)
        except Exception as e:
            # We should hit this exception rarely (that's why it is an
            # exception)  since at this point we have done multiple checks.
            # However, it can occur in weird cases, so better let the GUI know
            # that this failed
            return {'status': 'error',
                    'message': "Error creating artifact: %s" % str(e)}

    return {'status': 'success',
            'message': '',
            'artifact': artifact.id}
Пример #14
0
    def test_download_study(self):
        tmp_dir = mkdtemp()
        self._clean_up_files.append(tmp_dir)

        biom_fp = join(tmp_dir, 'otu_table.biom')
        smr_dir = join(tmp_dir, 'sortmerna_picked_otus')
        log_dir = join(smr_dir, 'seqs_otus.log')
        tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz')

        with biom_open(biom_fp, 'w') as f:
            et.to_hdf5(f, "test")
        makedirs(smr_dir)
        with open(log_dir, 'w') as f:
            f.write('\n')
        with open(tgz, 'w') as f:
            f.write('\n')

        files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]

        params = Parameters.from_default_params(
            next(Command(3).default_parameter_sets), {'input_data': 1})
        a = Artifact.create(files_biom,
                            "BIOM",
                            parents=[Artifact(2)],
                            processing_parameters=params)
        for x in a.filepaths:
            self._clean_up_files.append(x['fp'])

        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = ('- 1256812 /protected/processed_data/'
               '1_study_1001_closed_reference_otu_table.biom processed_data/'
               '1_study_1001_closed_reference_otu_table.biom\n'
               '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt '
               'mapping_files/4_mapping_file.txt\n'
               '- 1256812 /protected/processed_data/'
               '1_study_1001_closed_reference_otu_table.biom processed_data/'
               '1_study_1001_closed_reference_otu_table.biom\n'
               '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt '
               'mapping_files/5_mapping_file.txt\n'
               '- 1256812 /protected/processed_data/1_study_1001_'
               'closed_reference_otu_table_Silva.biom processed_data/'
               '1_study_1001_closed_reference_otu_table_Silva.biom\n'
               '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt '
               'mapping_files/6_mapping_file.txt\n'
               '- 1093210 /protected/BIOM/7/biom_table.biom '
               'BIOM/7/biom_table.biom\n'
               '- [0-9]* /protected/templates/1_prep_2_[0-9]*-[0-9]*.txt '
               'mapping_files/7_mapping_file.txt\n'
               '- [0-9]* /protected/BIOM/{0}/otu_table.biom '
               'BIOM/{0}/otu_table.biom\n'
               '- 1 /protected/BIOM/10/sortmerna_picked_otus/seqs_otus.log '
               'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
               '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt '
               'mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegex(response.body.decode('ascii'), exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        a.visibility = 'public'
        response = self.get('/download_study_bioms/1')
        # returning visibility
        a.visibility = 'private'
        self.assertEqual(response.code, 200)
        # we should have the same files than the previous test, except artifact
        # and mapping file 7: position 6 and 7; thus removing 6 twice
        exp = exp.split('\n')
        exp.pop(6)
        exp.pop(6)
        exp = '\n'.join(exp)
        self.assertRegex(response.body.decode('ascii'), exp)
Пример #15
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        # ignoring warnings generated when adding templates
        simplefilter("ignore")
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info)
        metadata_dict = {
            'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 1'},
            'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 2'},
            'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 3'}
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTC',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 1"},
            'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTA',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 2"},
            'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTT',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 3"},
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                          dtype=str)
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = Artifact.create(
            [(demux_fp, 6)], "Demultiplexed", prep_template=pt)

        return ppd
Пример #16
0
    def test_download_study(self):
        tmp_dir = mkdtemp()
        self._clean_up_files.append(tmp_dir)

        biom_fp = join(tmp_dir, 'otu_table.biom')
        smr_dir = join(tmp_dir, 'sortmerna_picked_otus')
        log_dir = join(smr_dir, 'seqs_otus.log')
        tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz')

        with biom_open(biom_fp, 'w') as f:
            et.to_hdf5(f, "test")
        makedirs(smr_dir)
        with open(log_dir, 'w') as f:
            f.write('\n')
        with open(tgz, 'w') as f:
            f.write('\n')

        files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]

        params = Parameters.from_default_params(
            Command(3).default_parameter_sets.next(), {'input_data': 1})
        a = Artifact.create(files_biom,
                            "BIOM",
                            parents=[Artifact(2)],
                            processing_parameters=params)
        for _, fp, _ in a.filepaths:
            self._clean_up_files.append(fp)

        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '- 1256812 /protected/processed_data/1_study_1001_closed_'
            'reference_otu_table.biom processed_data/1_study_1001_closed_'
            'reference_otu_table.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/4_mapping_file.txt\n'
            '- 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table.biom processed_data/'
            '1_study_1001_closed_reference_otu_table.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/5_mapping_file.txt\n'
            '- 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table_Silva.biom processed_data'
            '/1_study_1001_closed_reference_otu_table_Silva.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/6_mapping_file.txt\n'
            '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/7_mapping_file.txt\n'
            '- 39752 /protected/BIOM/{0}/otu_table.biom '
            'BIOM/{0}/otu_table.biom\n'
            '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log '
            'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.'
            'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegexpMatches(response.body, exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        a.visibility = 'public'
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = ('- 39752 /protected/BIOM/{0}/otu_table.biom '
               'BIOM/{0}/otu_table.biom\n'
               '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log '
               'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
               '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.'
               'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegexpMatches(response.body, exp)
Пример #17
0
def create_raw_data(filetype, prep_template, filepaths):
    """Creates a new raw data

    Needs to be dispachable because it moves large files
    """
    Artifact.create(filepaths, filetype, prep_template=prep_template)
Пример #18
0
    def test_download_study(self):
        tmp_dir = mkdtemp()
        self._clean_up_files.append(tmp_dir)

        biom_fp = join(tmp_dir, 'otu_table.biom')
        smr_dir = join(tmp_dir, 'sortmerna_picked_otus')
        log_dir = join(smr_dir, 'seqs_otus.log')
        tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz')

        with biom_open(biom_fp, 'w') as f:
            et.to_hdf5(f, "test")
        makedirs(smr_dir)
        with open(log_dir, 'w') as f:
            f.write('\n')
        with open(tgz, 'w') as f:
            f.write('\n')

        files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]

        params = Parameters.from_default_params(
            Command(3).default_parameter_sets.next(), {'input_data': 1})
        a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)],
                            processing_parameters=params)
        for _, fp, _ in a.filepaths:
            self._clean_up_files.append(fp)

        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '- 1256812 /protected/processed_data/1_study_1001_closed_'
            'reference_otu_table.biom processed_data/1_study_1001_closed_'
            'reference_otu_table.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/4_mapping_file.txt\n'
            '- 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table.biom processed_data/'
            '1_study_1001_closed_reference_otu_table.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/5_mapping_file.txt\n'
            '- 1256812 /protected/processed_data/'
            '1_study_1001_closed_reference_otu_table_Silva.biom processed_data'
            '/1_study_1001_closed_reference_otu_table_Silva.biom\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/6_mapping_file.txt\n'
            '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-'
            '[0-9]*.txt mapping_files/7_mapping_file.txt\n'
            '- [0-9]* /protected/BIOM/{0}/otu_table.biom '
            'BIOM/{0}/otu_table.biom\n'
            '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log '
            'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.'
            'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegexpMatches(response.body, exp)

        response = self.get('/download_study_bioms/200')
        self.assertEqual(response.code, 405)

        # changing user so we can test the failures
        BaseHandler.get_current_user = Mock(
            return_value=User("*****@*****.**"))
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 405)

        a.visibility = 'public'
        response = self.get('/download_study_bioms/1')
        self.assertEqual(response.code, 200)
        exp = (
            '- [0-9]* /protected/BIOM/{0}/otu_table.biom '
            'BIOM/{0}/otu_table.biom\n'
            '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log '
            'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n'
            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.'
            'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id))
        self.assertRegexpMatches(response.body, exp)
Пример #19
0
def create_raw_data(filetype, prep_template, filepaths):
    """Creates a new raw data

    Needs to be dispachable because it moves large files
    """
    Artifact.create(filepaths, filetype, prep_template=prep_template)