示例#1
0
def _insert_preprocessed_data(study, params, prep_template, slq_out,
                              **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    study : Study
        The study to preprocess
    params : BaseParameters
        The parameters to use for preprocessing
    prep_template : PrepTemplate
        The prep template to use for the preprocessing
    slq_out : str
        Path to the split_libraries_fastq.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the preprocessed output directory does not contain all the expected
        files
    """
    from os.path import exists, join
    from functools import partial
    from qiita_db.data import PreprocessedData

    # The filepaths that we are interested in are:
    #   1) seqs.fna -> demultiplexed fasta file
    #   2) seqs.fastq -> demultiplexed fastq file
    #   3) seqs.demux -> demultiplexed HDF5 file

    path_builder = partial(join, slq_out)
    fasta_fp = path_builder('seqs.fna')
    fastq_fp = path_builder('seqs.fastq')
    demux_fp = path_builder('seqs.demux')
    log_fp = path_builder('split_library_log.txt')

    # Check that all the files exist
    if not (exists(fasta_fp) and exists(demux_fp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % slq_out)

    filepaths = [(fasta_fp, "preprocessed_fasta"),
                 (demux_fp, "preprocessed_demux"),
                 (log_fp, "log")]

    if exists(fastq_fp):
        filepaths.append((fastq_fp, "preprocessed_fastq"))

    PreprocessedData.create(study, params._table, params.id, filepaths,
                            prep_template)

    # Change the prep_template status to success
    prep_template.preprocessing_status = 'success'
示例#2
0
def _insert_preprocessed_data(study, params, prep_template, slq_out, **kwargs):
    """Inserts the preprocessed data to the database

    Parameters
    ----------
    study : Study
        The study to preprocess
    params : BaseParameters
        The parameters to use for preprocessing
    prep_template : PrepTemplate
        The prep template to use for the preprocessing
    slq_out : str
        Path to the split_libraries_fastq.py output directory
    kwargs: ignored
        Necessary to include to support execution via moi.

    Raises
    ------
    ValueError
        If the preprocessed output directory does not contain all the expected
        files
    """
    from os.path import exists, join
    from functools import partial
    from qiita_db.data import PreprocessedData

    # The filepaths that we are interested in are:
    #   1) seqs.fna -> demultiplexed fasta file
    #   2) seqs.fastq -> demultiplexed fastq file
    #   3) seqs.demux -> demultiplexed HDF5 file

    path_builder = partial(join, slq_out)
    fasta_fp = path_builder('seqs.fna')
    fastq_fp = path_builder('seqs.fastq')
    demux_fp = path_builder('seqs.demux')
    log_fp = path_builder('split_library_log.txt')

    # Check that all the files exist
    if not (exists(fasta_fp) and exists(demux_fp) and exists(log_fp)):
        raise ValueError("The output directory %s does not contain all the "
                         "expected files." % slq_out)

    filepaths = [(fasta_fp, "preprocessed_fasta"),
                 (demux_fp, "preprocessed_demux"), (log_fp, "log")]

    if exists(fastq_fp):
        filepaths.append((fastq_fp, "preprocessed_fastq"))

    PreprocessedData.create(study, params._table, params.id, filepaths,
                            prep_template)

    # Change the prep_template status to success
    prep_template.preprocessing_status = 'success'
示例#3
0
 def test_create_error_data_type(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics",
                                 prep_template=self.prep_template)
示例#4
0
 def test_create_error_data_type(self):
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_sequence_illumina_params",
                                 self.params_id, self.filepaths,
                                 data_type="Metabolomics",
                                 prep_template=self.prep_template)
示例#5
0
 def test_create_error_dynamic_table(self):
     """Raises an error if the preprocessed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "foo",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_foo",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "foo_params",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study,
                                 "preprocessed_foo_params",
                                 self.params_id,
                                 self.filepaths,
                                 data_type="18S")
示例#6
0
 def test_processing_status_setter_valueerror(self):
     """Raises an error if the processing status is not recognized"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     with self.assertRaises(ValueError):
         ppd.processing_status = 'not a valid state'
示例#7
0
文件: test_data.py 项目: zonca/qiita
 def test_processing_status_setter_valueerror(self):
     """Raises an error if the processing status is not recognized"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     with self.assertRaises(ValueError):
         ppd.processing_status = 'not a valid state'
示例#8
0
文件: test_data.py 项目: zonca/qiita
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', None, None, 2, 'idle', 'not submitted',
                'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (obs_id - 1, obs_id))
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[obs_id - 1, exp_fna_fp, 4, '852952723', 1, 3],
               [obs_id, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, obs_id - 1], [3, obs_id]])
示例#9
0
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', None, None, 2, 'idle', 'not submitted',
                'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs_id = self.conn_handler.execute_fetchone(
            "SELECT count(1) from qiita.filepath")[0]
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=%d or "
            "filepath_id=%d" % (obs_id - 1, obs_id))
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[obs_id - 1, exp_fna_fp, 4, '852952723', 1, 3],
               [obs_id, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, obs_id - 1], [3, obs_id]])
示例#10
0
    def test_ebi_study_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_study_accession = 'EBI12345-DD'
        self.assertEqual(new.ebi_study_accession, 'EBI12345-DD')
示例#11
0
    def test_set_ebi_submission_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table, self.params_id, self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_submission_accession = 'EBI12345-CC'
        self.assertEqual(new.ebi_submission_accession, 'EBI12345-CC')
示例#12
0
    def test_ebi_study_accession(self):
        new = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_study_accession = 'EBI12345-DD'
        self.assertEqual(new.ebi_study_accession, 'EBI12345-DD')
示例#13
0
文件: test_data.py 项目: zonca/qiita
    def test_processing_status(self):
        """processing_status works correctly"""
        # Processed case
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.processing_status, 'not_processed')

        # not processed case
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(ppd.processing_status, 'not_processed')
示例#14
0
    def test_processing_status(self):
        """processing_status works correctly"""
        # Processed case
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.processing_status, 'not_processed')

        # not processed case
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(ppd.processing_status, 'not_processed')
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))
        ]

        preprocessed_data = PreprocessedData.create(
            Study(1),
            "preprocessed_sequence_illumina_params",
            1,
            filepaths,
            data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [
            db_path_builder("%s_otu_table.biom" % new_id),
            db_path_builder("%s_sortmerna_picked_otus" % new_id),
            db_path_builder("%s_%s" % (new_id, basename(fp)))
        ]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(
            self.conn_handler.execute_fetchone(
                "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
                "processed_data_id=%s)", (new_id, ))[0])
示例#16
0
    def test_set_ebi_submission_accession(self):
        new = PreprocessedData.create(
            self.study,
            self.params_table,
            self.params_id,
            self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)

        new.ebi_submission_accession = 'EBI12345-CC'
        self.assertEqual(new.ebi_submission_accession, 'EBI12345-CC')
示例#17
0
 def test_create_error(self):
     """Raises an error if the preprocessed_params_table does not exist"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "foo", self.params_id,
                                 self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "preprocessed_foo",
                                 self.params_id, self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "foo_params", self.params_id,
                                 self.filepaths)
     with self.assertRaises(IncompetentQiitaDeveloperError):
         PreprocessedData.create(self.study, "preprocessed_foo_params",
                                 self.params_id, self.filepaths)
示例#18
0
 def test_processing_status_setter(self):
     """Able to update the processing status"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     self.assertEqual(ppd.processing_status, 'not_processed')
     ppd.processing_status = 'processing'
     self.assertEqual(ppd.processing_status, 'processing')
     ppd.processing_status = 'processed'
     self.assertEqual(ppd.processing_status, 'processed')
     state = 'failed: some error message'
     ppd.processing_status = state
     self.assertEqual(ppd.processing_status, state)
示例#19
0
文件: test_data.py 项目: zonca/qiita
 def test_processing_status_setter(self):
     """Able to update the processing status"""
     ppd = PreprocessedData.create(self.study, self.params_table,
                                   self.params_id, self.filepaths,
                                   data_type="18S")
     self.assertEqual(ppd.processing_status, 'not_processed')
     ppd.processing_status = 'processing'
     self.assertEqual(ppd.processing_status, 'processing')
     ppd.processing_status = 'processed'
     self.assertEqual(ppd.processing_status, 'processed')
     state = 'failed: some error message'
     ppd.processing_status = state
     self.assertEqual(ppd.processing_status, state)
示例#20
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      raw_data=self.raw_data)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, raw_data_id, preprocessed_params_tables,
        # preprocessed_params_id
        exp = [[3, "preprocessed_sequence_illumina_params", 1, False]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=10 or "
            "filepath_id=11")
        # filepath_id, path, filepath_type_id
        exp = [[10, exp_fna_fp, 4, '852952723', 1],
               [11, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, 10], [3, 11]])
示例#21
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, preprocessed_params_table,
        # preprocessed_params_id, submitted_to_insdc_status,
        # ebi_submission_accession, ebi_study_accession, data_type_id,
        # link_filepaths_status, vamps_status, processing_status
        exp = [[3, "preprocessed_sequence_illumina_params", 1,
                'not submitted', "EBI123456-A", "EBI123456-B", 2, 'idle',
                'not submitted', 'not_processed']]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir,
                          "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=17 or "
            "filepath_id=18")
        exp_fna_fp = "3_%s" % basename(self.fna_fp)
        exp_qual_fp = "3_%s" % basename(self.qual_fp)
        # filepath_id, path, filepath_type_id
        exp = [[17, exp_fna_fp, 4, '852952723', 1, 3],
               [18, exp_qual_fp, 5, '852952723', 1, 3]]
        self.assertEqual(obs, exp)
示例#22
0
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(self.study,
                                      self.params_table,
                                      self.params_id,
                                      self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[
            3, "preprocessed_sequence_illumina_params", 1, False, None, None, 2
        ]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_fna_fp, 4, '852952723', 1],
               [16, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data have been correctly
        # linked with the filepaths
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_filepath WHERE "
            "preprocessed_data_id=3")
        # preprocessed_data_id, filepath_id
        self.assertEqual(obs, [[3, 15], [3, 16]])
示例#23
0
    def test_create(self):
        """Correctly creates all the rows in the DB for preprocessed data"""
        # Check that the returned object has the correct id
        obs = PreprocessedData.create(
            self.study,
            self.params_table,
            self.params_id,
            self.filepaths,
            raw_data=self.raw_data,
            ebi_submission_accession=self.ebi_submission_accession,
            ebi_study_accession=self.ebi_study_accession)
        self.assertEqual(obs.id, 3)

        # Check that the preprocessed data have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[
            3, "preprocessed_sequence_illumina_params", 1, False,
            "EBI123456-A", "EBI123456-B", 2
        ]]
        self.assertEqual(obs, exp)

        # Check that the preprocessed data has been linked with its study
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.study_preprocessed_data WHERE "
            "preprocessed_data_id=3")
        exp = [[1, 3]]
        self.assertEqual(obs, exp)

        # Check that the files have been copied to right location
        exp_fna_fp = join(self.db_test_ppd_dir, "3_%s" % basename(self.fna_fp))
        self.assertTrue(exists(exp_fna_fp))
        self._clean_up_files.append(exp_fna_fp)

        exp_qual_fp = join(self.db_test_ppd_dir,
                           "3_%s" % basename(self.qual_fp))
        self.assertTrue(exists(exp_qual_fp))
        self._clean_up_files.append(exp_qual_fp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16")
        # filepath_id, path, filepath_type_id
        exp = [[15, exp_fna_fp, 4, '852952723', 1],
               [16, exp_qual_fp, 5, '852952723', 1]]
        self.assertEqual(obs, exp)
示例#24
0
文件: test_data.py 项目: jenwei/qiita
    def test_delete_advanced(self):
        # testing that we can not remove cause preprocessed data has been
        # submitted to EBI or VAMPS
        ppd = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template)

        # fails due to VAMPS submission
        ppd.update_vamps_status('success')
        with self.assertRaises(QiitaDBStatusError):
            PreprocessedData.delete(ppd.id)
        ppd.update_vamps_status('failed')

        ppd = PreprocessedData(1)
        with self.assertRaises(QiitaDBStatusError):
            PreprocessedData.delete(ppd.id)
示例#25
0
文件: test_data.py 项目: zonca/qiita
    def test_status(self):
        ppd = PreprocessedData(1)
        self.assertEqual(ppd.status, 'private')

        # Since the status is inferred from the processed data, change the
        # status of the processed data so we can check how it changes in the
        # preprocessed data
        pd = ProcessedData(1)
        pd.status = 'public'
        self.assertEqual(ppd.status, 'public')

        # Check that new preprocessed data has sandbox as status since no
        # processed data exists for them
        ppd = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="16S")
        self.assertEqual(ppd.status, 'sandbox')
示例#26
0
    def write_demux_files(self, prep_template, generate_hdf5=True):
        """Writes a demux test file to avoid duplication of code"""
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        if generate_hdf5:
            with open(fna_fp, 'w') as f:
                f.write(FASTA_EXAMPLE)
            with File(demux_fp, "w") as f:
                to_hdf5(fna_fp, f)
        else:
            with open(demux_fp, 'w') as f:
                f.write('')

        ppd = PreprocessedData.create(Study(1),
                                      "preprocessed_sequence_illumina_params",
                                      1, [(demux_fp, 6)], prep_template)
        return ppd
示例#27
0
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))]

        preprocessed_data = PreprocessedData.create(
            Study(1), "preprocessed_sequence_illumina_params", 1,
            filepaths, data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [db_path_builder("%s_otu_table.biom" % new_id),
                    db_path_builder("%s_sortmerna_picked_otus" % new_id),
                    db_path_builder("%s_%s" % (new_id, basename(fp)))]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
            "processed_data_id=%s)", (new_id, ))[0])
示例#28
0
文件: test_data.py 项目: jenwei/qiita
    def test_delete_basic(self):
        """Correctly deletes a preprocessed data"""
        # testing regular delete
        ppd = PreprocessedData.create(
            self.study, self.params_table,
            self.params_id, self.filepaths, prep_template=self.prep_template)
        PreprocessedData.delete(ppd.id)

        # testing that the deleted preprocessed data can't be instantiated
        with self.assertRaises(QiitaDBUnknownIDError):
            PreprocessedData(ppd.id)
        # and for completeness testing that it raises an error if ID
        # doesn't exist
        with self.assertRaises(QiitaDBUnknownIDError):
            PreprocessedData.delete(ppd.id)

        # testing that we can not remove cause the preprocessed data != sandbox
        with self.assertRaises(QiitaDBStatusError):
            PreprocessedData.delete(1)
示例#29
0
文件: test_data.py 项目: jenwei/qiita
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        new_id = get_count('qiita.preprocessed_data') + 1
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, new_id)

        # Check that all the information is initialized correctly
        self.assertEqual(obs.processed_data, [])
        self.assertEqual(obs.prep_template, [])
        self.assertEqual(obs.study, self.study.id)
        self.assertEqual(obs.data_type(), "18S")
        self.assertEqual(obs.data_type(ret_id=True),
                         convert_to_id("18S", "data_type"))
        self.assertEqual(obs.submitted_to_vamps_status(), "not submitted")
        self.assertEqual(obs.processing_status, "not_processed")
        self.assertEqual(obs.status, "sandbox")
        self.assertEqual(obs.preprocessing_info,
                         (self.params_table, self.params_id))
示例#30
0
    def generate_new_study_with_preprocessed_data(self):
        """Creates a new study up to the processed data for testing"""
        # ignoring warnings generated when adding templates
        simplefilter("ignore")
        info = {
            "timeseries_type_id": 1,
            "metadata_complete": True,
            "mixs_compliant": True,
            "number_samples_collected": 3,
            "number_samples_promised": 3,
            "study_alias": "Test EBI",
            "study_description": "Study for testing EBI",
            "study_abstract": "Study for testing EBI",
            "emp_person_id": StudyPerson(2),
            "principal_investigator_id": StudyPerson(3),
            "lab_person_id": StudyPerson(1)
        }
        study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info)
        metadata_dict = {
            'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 1'},
            'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 2'},
            'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0),
                        'physical_specimen_location': 'location1',
                        'taxon_id': 9606,
                        'scientific_name': 'h**o sapiens',
                        'Description': 'Test Sample 3'}
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        SampleTemplate.create(metadata, study)
        metadata_dict = {
            'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTC',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 1"},
            'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTA',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 2"},
            'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA',
                        'barcode': 'CGTAGAGCTCTT',
                        'center_name': 'KnightLab',
                        'platform': 'ILLUMINA',
                        'instrument_model': 'Illumina MiSeq',
                        'library_construction_protocol': 'Protocol ABC',
                        'experiment_design_description': "Random value 3"},
        }
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
        pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics')
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE_2.format(study.id))
        with File(demux_fp, 'w') as f:
            to_hdf5(fna_fp, f)

        ppd = PreprocessedData.create(
            study, "preprocessed_sequence_illumina_params", 1,
            [(demux_fp, 6)], pt)

        return ppd