Exemplo n.º 1
0
    def test_shogun_utree(self):
        # inserting new prep template
        prep_info_dict = {
            'SKB8.640193': {'run_prefix': 'S22205_S104'},
            'SKD8.640184': {'run_prefix': 'S22282_S102'}}
        data = {'prep_info': dumps(prep_info_dict),
                # magic #1 = testing study
                'study': 1,
                'data_type': 'Metagenomic'}
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        fp1_1, fp1_2, fp2_1, fp2_2 = self._helper_shogun_bowtie()
        data = {
            'filepaths': dumps([
                (fp1_1, 'raw_forward_seqs'),
                (fp1_2, 'raw_reverse_seqs'),
                (fp2_1, 'raw_forward_seqs'),
                (fp2_2, 'raw_reverse_seqs')]),
            'type': "per_sample_FASTQ",
            'name': "Test Shogun artifact",
            'prep': pid}
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['input'] = aid
        self.params['Aligner tool'] = 'utree'
        data = {'user': '******',
                'command': dumps(['qp-shogun', '012020', 'Shogun v1.0.7']),
                'status': 'running',
                'parameters': dumps(self.params)}
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        success, ainfo, msg = shogun(self.qclient, jid, self.params, out_dir)

        self.assertEqual("", msg)
        self.assertTrue(success)

        # we are expecting 1 artifacts in total
        pout_dir = partial(join, out_dir)
        self.assertCountEqual(ainfo, [
            ArtifactInfo('Shogun Alignment Profile', 'BIOM',
                         [(pout_dir('otu_table.alignment.profile.biom'),
                           'biom'),
                          (pout_dir('alignment.utree.tsv.xz'), 'log')]),
            ArtifactInfo('Taxonomic Predictions - phylum', 'BIOM',
                         [(pout_dir('otu_table.redist.phylum.biom'),
                           'biom')]),
            ArtifactInfo('Taxonomic Predictions - genus', 'BIOM',
                         [(pout_dir('otu_table.redist.genus.biom'),
                           'biom')]),
            ArtifactInfo('Taxonomic Predictions - species', 'BIOM',
                         [(pout_dir('otu_table.redist.species.biom'),
                           'biom')])])
Exemplo n.º 2
0
def _per_sample_ainfo(out_dir, samples, fwd_and_rev=False):
    files = []
    missing_files = []

    if fwd_and_rev:
        suffixes = [
            '%s_paired_1.fastq', '%s_paired_2.fastq', '%s_unmatched_1.fastq',
            '%s_unmatched_2.fastq'
        ]
    else:
        suffixes = ['%s.fastq']

    for rp, _, _, _ in samples:
        smd = partial(join, out_dir, rp)
        for suff in suffixes:
            fname = smd(suff % rp)
            if exists(fname):
                files.append(fname)
            else:
                missing_files.append(fname)

    if not files:
        # KneadData did not create any files, which means that no sequence
        # was kept after quality control and filtering for host data
        raise ValueError("No sequences left after running KneadData")

    # Generate the missing files
    for f in missing_files:
        open(f, 'w', 0).close()
        files.append(f)

    # Gzip all the files
    files = [(_gzip_file(f), 'preprocessed_fastq') for f in files]

    return [ArtifactInfo('KneadData files', 'per_sample_FASTQ', files)]
Exemplo n.º 3
0
    def test_validate_multiple_single_lane(self):
        test_dir = mkdtemp()
        self._clean_up_files.append(test_dir)

        copyfile(self.fastq, f'{test_dir}/prefix1.fastq')
        copyfile(self.fastq, f'{test_dir}/prefix1_b.fastq')

        prep_info = {
            "1.SKB2.640194": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKM4.640180": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKB3.640195": {
                "not_a_run_prefix": "prefix2"
            }
        }
        files = {
            'raw_forward_seqs': [f'{test_dir}/prefix1.fastq'],
            'raw_barcodes': [f'{test_dir}/prefix1_b.fastq']
        }
        atype = "FASTQ"
        job_id, _ = self._create_template_and_job(prep_info, files, atype)

        obs_success, obs_ainfo, obs_error = _validate_multiple(
            self.qclient, job_id, prep_info, files, atype)

        self.assertEqual(obs_error, "")
        self.assertTrue(obs_success)
        filepaths = [(f'{test_dir}/prefix1_b.fastq.gz', 'raw_barcodes'),
                     (f'{test_dir}/prefix1.fastq.gz', 'raw_forward_seqs')]
        exp = [ArtifactInfo(None, atype, filepaths)]
        self.assertEqual(obs_ainfo, exp)
Exemplo n.º 4
0
 def func(qclient, job_id, job_params, working_dir):
     fp = join(working_dir, 'test.fastq')
     with open(fp, 'w') as f:
         f.write('')
     res = ArtifactInfo('out1', 'Demultiplexed',
                        [[fp, 'preprocessed_fastq']])
     return True, "", [res]
Exemplo n.º 5
0
    def test_validate_run_prefix(self):
        prep_info = {
            'SKB8.640193': {
                'col': 'val1',
                'run_prefix': 'Sample1'
            },
            'SKD8.640184': {
                'col': 'val2',
                'run_prefix': 'Sample2'
            }
        }
        data = {'prep_info': dumps(prep_info), 'study': 1, 'data_type': '16S'}
        res = self.qclient.post('/apitest/prep_template/', data=data)

        sample_ids = ['Sample1', 'Sample2']
        biom_fp, job_id, parameters = self._create_job_and_biom(
            sample_ids, template=res['prep'])
        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        exp_fp = partial(join, self.out_dir)
        exp_biom_fp = exp_fp(basename(biom_fp))
        exp_index_fp = exp_fp('index.html')
        exp_viz_fp = exp_fp('support_files')
        exp_qza_fp = exp_fp('feature-table.qza')
        self._clean_up_files.append(exp_biom_fp)
        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo, [
            ArtifactInfo(None, 'BIOM', [(exp_biom_fp, 'biom'),
                                        (exp_index_fp, 'html_summary'),
                                        (exp_viz_fp, 'html_summary_dir'),
                                        (exp_qza_fp, 'qza')])
        ])
        self.assertEqual(obs_error, "")
        obs_t = load_table(exp_biom_fp)
        self.assertCountEqual(obs_t.ids(), ["1.SKB8.640193", "1.SKD8.640184"])
Exemplo n.º 6
0
    def test_validate_FeatureData(self):
        # Create the feature data
        fd, fd_fp = mkstemp(suffix='.txt', dir=self.out_dir)
        close(fd)
        with open(fd_fp, 'w') as f:
            f.write("Feature ID\tTaxonomy\tConfidence\n")
            f.write("TACGGAGGA\tk__Bacteria;p__Bacteroidetes;c__Bacteroidia\t"
                    "0.9998743\n")
            f.write("TACGTAGGG\tk__Bacteria;p__Firmicutes;c__Clostridia\t"
                    "0.9999999\n")

        # Test success
        obs_success, obs_ainfo, obs_error = _validate_feature_data(
            {'plain_text': [fd_fp]}, None, self.out_dir)
        self.assertEqual(obs_error, "")
        self.assertTrue(obs_success)
        exp_ainfo = [
            ArtifactInfo(None, "FeatureData", [(fd_fp, 'plain_text')])
        ]
        self.assertEqual(obs_ainfo, exp_ainfo)

        # Test failure wrong format
        fd, fd_fp = mkstemp(suffix='.txt', dir=self.out_dir)
        close(fd)
        with open(fd_fp, 'w') as f:
            f.write("Feature ID\tIt's gonna fail!\tConfidence\n")
            f.write("TACGGAGGA\tk__Bacteria;p__Bacteroidetes;c__Bacteroidia\t"
                    "0.9998743\n")
            f.write("TACGTAGGG\tk__Bacteria;p__Firmicutes;c__Clostridia\t"
                    "0.9999999\n")
        obs_success, obs_ainfo, obs_error = _validate_feature_data(
            {'plain_text': [fd_fp]}, None, self.out_dir)
        self.assertIn("The file header seems wrong", obs_error)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
Exemplo n.º 7
0
    def test_validate_multiple_single_lane(self):
        prep_info = {
            "1.SKB2.640194": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKM4.640180": {
                "not_a_run_prefix": "prefix1"
            },
            "1.SKB3.640195": {
                "not_a_run_prefix": "prefix2"
            }
        }
        files = {
            'raw_forward_seqs': ['/path/to/prefix1.fastq'],
            'raw_barcodes': ['/path/to/prefix1_b.fastq']
        }
        atype = "FASTQ"
        job_id = self._create_template_and_job(prep_info, files, atype)

        obs_success, obs_ainfo, obs_error = _validate_multiple(
            self.qclient, job_id, prep_info, files, atype)

        self.assertTrue(obs_success)
        filepaths = [('/path/to/prefix1_b.fastq', 'raw_barcodes'),
                     ('/path/to/prefix1.fastq', 'raw_forward_seqs')]
        exp = [ArtifactInfo(None, atype, filepaths)]
        self.assertEqual(obs_ainfo, exp)
        self.assertEqual(obs_error, "")
Exemplo n.º 8
0
    def test_validate_distance_matrix(self):
        # Create a distance matrix
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
            '1.SKB7.640196'
        ]
        dm_fp = self._create_distance_matrix(sample_ids)

        # Test success
        obs_success, obs_ainfo, obs_error = _validate_distance_matrix(
            {'plain_text': [dm_fp]}, self.metadata, self.out_dir)
        self.assertTrue(obs_success)
        exp_ainfo = [
            ArtifactInfo(None, "distance_matrix", [(dm_fp, 'plain_text')])
        ]
        self.assertEqual(obs_ainfo, exp_ainfo)
        self.assertEqual(obs_error, "")

        # Test failure
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
            'NotASample'
        ]
        dm_fp = self._create_distance_matrix(sample_ids)
        obs_success, obs_ainfo, obs_error = _validate_distance_matrix(
            {'plain_text': [dm_fp]}, self.metadata, self.out_dir)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The distance matrix contain samples not "
            "present in the metadata")
Exemplo n.º 9
0
 def test_validate_per_sample_FASTQ(self):
     prep_info = {
         "1.SKB2.640194": {
             "not_a_run_prefix": "prefix1"
         },
         "1.SKM4.640180": {
             "not_a_run_prefix": "prefix1"
         },
         "1.SKB3.640195": {
             "not_a_run_prefix": "prefix2"
         }
     }
     files = {
         'raw_forward_seqs': [
             '/path/to/SKB2.640194_file.fastq',
             '/path/to/SKM4.640180_file.fastq',
             '/path/to/SKB3.640195_file.fastq'
         ]
     }
     job_id = self._create_template_and_job(prep_info, files,
                                            "per_sample_FASTQ")
     obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
         self.qclient, job_id, prep_info, files)
     self.assertTrue(obs_success)
     filepaths = [('/path/to/SKB2.640194_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKM4.640180_file.fastq', 'raw_forward_seqs'),
                  ('/path/to/SKB3.640195_file.fastq', 'raw_forward_seqs')]
     exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)]
     self.assertEqual(obs_ainfo, exp)
     self.assertEqual(obs_error, "")
Exemplo n.º 10
0
    def test_validate_multiple(self):
        prep_info = {
            '1.SKB2.640194': {
                'run_prefix': 'prefix1'
            },
            '1.SKM4.640180': {
                'run_prefix': 'prefix1'
            },
            '1.SKB3.640195': {
                'run_prefix': 'prefix2'
            }
        }
        files = {
            'raw_forward_seqs':
            ['/path/to/prefix1.fastq', '/path/to/prefix2.fastq'],
            'raw_barcodes':
            ['/path/to/prefix1_b.fastq', '/path/to/prefix2_b.fastq']
        }
        atype = "FASTQ"
        job_id = self._create_template_and_job(prep_info, files, atype)

        obs_success, obs_ainfo, obs_error = _validate_multiple(
            self.qclient, job_id, prep_info, files, atype)

        self.assertTrue(obs_success)
        filepaths = [('/path/to/prefix1_b.fastq', 'raw_barcodes'),
                     ('/path/to/prefix2_b.fastq', 'raw_barcodes'),
                     ('/path/to/prefix1.fastq', 'raw_forward_seqs'),
                     ('/path/to/prefix2.fastq', 'raw_forward_seqs')]
        exp = [ArtifactInfo(None, "FASTQ", filepaths)]
        self.assertEqual(obs_ainfo, exp)
        self.assertEqual(obs_error, "")
Exemplo n.º 11
0
def _per_sample_ainfo(out_dir,
                      samples,
                      suffixes,
                      prg_name,
                      files_type_name,
                      fwd_and_rev=False):
    files = []
    missing_files = []
    smd = partial(join, out_dir)
    for rp, _, _, _ in samples:
        for suff in suffixes:
            fname = smd(suff % rp)
            if exists(fname):
                if fname.endswith('R1.fastq.gz'):
                    ftype = 'raw_forward_seqs'
                elif fname.endswith('R2.fastq.gz'):
                    ftype = 'raw_reverse_seqs'
                else:
                    # this should never happen and it's not really possible
                    # to reproduce so no tests!
                    raise ValueError('File %s has an unexpected name' % fname)
                files.append((fname, ftype))
            else:
                missing_files.append(fname)

    if not files:
        # Command did not create any files, which means that no sequence
        # was kept after quality control and filtering for host data
        raise ValueError("No sequences left after %s" % prg_name)

    return [ArtifactInfo(files_type_name, 'per_sample_FASTQ', files)]
Exemplo n.º 12
0
    def test_validate_prefix(self):
        prep_info = {
            'SKB8.640193': {
                'col': 'val1'
            },
            'SKD8.640184': {
                'col': 'val2'
            }
        }
        data = {'prep_info': dumps(prep_info), 'study': 1, 'data_type': '16S'}
        res = self.qclient.post('/apitest/prep_template/', data=data)

        sample_ids = ['SKB8.640193', 'SKD8.640184']
        biom_fp, job_id, parameters = self._create_job_and_biom(
            sample_ids, template=res['prep'])

        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        exp_biom_fp = join(self.out_dir, basename(biom_fp))
        self._clean_up_files.append(exp_biom_fp)
        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo,
                         [ArtifactInfo(None, 'BIOM', [(exp_biom_fp, 'biom')])])
        self.assertEqual(obs_error, "")
        obs_t = load_table(exp_biom_fp)
        self.assertCountEqual(obs_t.ids(), ['1.SKB8.640193', '1.SKD8.640184'])
Exemplo n.º 13
0
def _validate_alpha_vector(files, metadata, out_dir):
    # Magic number [0] -> there is only one plain text file, which is the
    # ordination results
    alpha_vector = files['plain_text'][0]
    alpha_qza = None
    if 'qza' in files:
        alpha_qza = files['qza'][0]

    # Parse the sample ids from the alphe_vector file
    alpha_ids = []
    with open(alpha_vector) as f:
        # Ignore the header line
        f.readline()
        for line in f:
            vals = line.strip().split('\t')
            if len(vals) != 2:
                return (False, None, "The alpha vector format is incorrect")
            alpha_ids.append(vals[0])

    metadata_ids = set(metadata)
    alpha_ids = set(alpha_ids)

    if not metadata_ids.issuperset(alpha_ids):
        return (False, None, "The alpha vector contains samples not present "
                "in the metadata")

    filepaths = [(alpha_vector, 'plain_text')]
    if alpha_qza is not None:
        filepaths.append((alpha_qza, 'qza'))

    return True, [ArtifactInfo(None, 'alpha_vector', filepaths)], ""
Exemplo n.º 14
0
    def test_validate_ordination_results(self):
        # Create the ordination results
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
            '1.SKB7.640196'
        ]
        ord_res_fp = self._create_ordination_results(sample_ids)

        # Test success
        obs_success, obs_ainfo, obs_error = _validate_ordination_results(
            {'plain_text': [ord_res_fp]}, self.metadata, self.out_dir)
        self.assertTrue(obs_success)
        exp_ainfo = [
            ArtifactInfo(None, "ordination_results",
                         [(ord_res_fp, 'plain_text')])
        ]
        self.assertEqual(obs_ainfo, exp_ainfo)
        self.assertEqual(obs_error, "")

        # Test failure
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
            'NotASample'
        ]
        ord_res_fp = self._create_ordination_results(sample_ids)
        obs_success, obs_ainfo, obs_error = _validate_ordination_results(
            {'plain_text': [ord_res_fp]}, self.metadata, self.out_dir)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error, "The ordination results contain samples "
            "not present in the metadata")
Exemplo n.º 15
0
    def test_validate_no_changes(self):
        sample_ids = [
            '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176',
            '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201',
            '1.SKM2.640199', '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185',
            '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', '1.SKB1.640202',
            '1.SKM1.640183', '1.SKD1.640179', '1.SKD3.640198', '1.SKB5.640181',
            '1.SKB4.640189', '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184',
            '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191'
        ]
        biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                                template=1)

        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        exp_fp = partial(join, self.out_dir)
        exp_index_fp = exp_fp('index.html')
        exp_viz_fp = exp_fp('support_files')
        exp_qza_fp = exp_fp('feature-table.qza')
        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo, [
            ArtifactInfo(None, 'BIOM', [(biom_fp, 'biom'),
                                        (exp_index_fp, 'html_summary'),
                                        (exp_viz_fp, 'html_summary_dir'),
                                        (exp_qza_fp, 'qza')])
        ])
        self.assertEqual(obs_error, "")
Exemplo n.º 16
0
 def test_validate_demux_file(self):
     demux_fp, _, out_dir = self._generate_files({'s1': 's1', 's2': 's2'})
     prep_info = {
         "1.SKB2.640194": {
             "run_prefix": "s1"
         },
         "1.SKM4.640180": {
             "run_prefix": "s2"
         },
         "1.SKB3.640195": {
             "run_prefix": "s3"
         },
         "1.SKB6.640176": {
             "run_prefix": "s4"
         }
     }
     files = {'preprocessed_demux': demux_fp}
     job_id, _ = self._create_template_and_job(prep_info, files,
                                               "Demultiplexed")
     obs_success, obs_ainfo, obs_error = _validate_demux_file(
         self.qclient, job_id, prep_info, out_dir, demux_fp)
     self.assertTrue(obs_success)
     name = splitext(basename(demux_fp))[0]
     exp_fastq_fp = join(out_dir, "%s.fastq.gz" % name)
     exp_fasta_fp = join(out_dir, "%s.fasta.gz" % name)
     exp_demux_fp = join(out_dir, basename(demux_fp))
     filepaths = [(exp_fastq_fp, 'preprocessed_fastq'),
                  (exp_fasta_fp, 'preprocessed_fasta'),
                  (exp_demux_fp, 'preprocessed_demux')]
     exp = [ArtifactInfo(None, "Demultiplexed", filepaths)]
     self.assertEqual(obs_ainfo, exp)
     self.assertEqual(obs_error, "")
     with File(exp_demux_fp) as f:
         self.assertCountEqual(f.keys(), ["1.SKB2.640194", "1.SKM4.640180"])
Exemplo n.º 17
0
 def test_validate_per_sample_FASTQ_run_prefix(self):
     f1 = join(self.source_dir, 'SKB2.640194_file.fastq')
     f2 = join(self.source_dir, 'SKM4.640180_file.fastq')
     f3 = join(self.source_dir, 'SKB3.640195_file.fastq')
     raw_files = [f1, f2, f3]
     for x in raw_files:
         copyfile(self.fastq, x)
         self._clean_up_files.append(x)
     prep_info = {
         "1.SKB2.640194": {
             "run_prefix": "prefix1"
         },
         "1.SKM4.640180": {
             "run_prefix": "prefix2"
         },
         "1.SKB3.640195": {
             "run_prefix": "prefix3"
         }
     }
     files = {'raw_forward_seqs': raw_files}
     job_id, _ = self._create_template_and_job(prep_info, files,
                                               "per_sample_FASTQ")
     obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ(
         self.qclient, job_id, prep_info, files)
     self.assertEqual(obs_error, "")
     self.assertTrue(obs_success)
     filepaths = [('%s.gz' % x, 'raw_forward_seqs') for x in raw_files]
     exp = [ArtifactInfo(None, "per_sample_FASTQ", filepaths)]
     self.assertEqual(obs_ainfo, exp)
Exemplo n.º 18
0
def spades(qclient, job_id, parameters, out_dir):
    """Run spades with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    msg = "Step 3 of 4: Checking resulting files"
    qclient.update_job_step(job_id, msg)

    artifact_id = parameters['input']
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    df = pd.read_csv(prep_info['prep-file'],
                     sep='\t',
                     dtype='str',
                     na_values=[],
                     keep_default_na=True)
    snames = df.sample_name.values

    missing = []
    outfiles = []
    for sname in snames:
        scaffold = join(out_dir, sname, 'scaffolds.fasta')
        if exists(scaffold):
            new_scaffold = join(out_dir, sname, f'{sname}.fasta')
            run(['mv', scaffold, new_scaffold], stdout=PIPE)
            outfiles.append((new_scaffold, 'preprocessed_fasta'))
        else:
            missing.append(sname)

    if missing:
        error_msg = ('There was no scaffolds.fasta for samples: %s. Contact: '
                     '[email protected] and add this job id: %s' %
                     (', '.join(missing), job_id))
        return False, None, error_msg

    # Step 4 generating artifacts
    msg = "Step 4 of 4: Generating new artifact"
    qclient.update_job_step(job_id, msg)
    ainfo = [
        ArtifactInfo('Preprocessed FASTA', 'FASTA_preprocessed', outfiles)
    ]

    return True, ainfo, ""
Exemplo n.º 19
0
    def test_validate_representative_set(self):
        sample_ids = [
            '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176',
            '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201',
            '1.SKM2.640199'
        ]
        biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                                template=1)

        fd, fasta_fp = mkstemp(suffix=".fna")
        close(fd)
        with open(fasta_fp, 'w') as f:
            f.write(">O1 something\nACTG\n>O2\nATGC\n")
        self._clean_up_files.append(fasta_fp)

        parameters = {
            'template': parameters['template'],
            'files': dumps({
                'biom': [biom_fp],
                'preprocessed_fasta': [fasta_fp]
            }),
            'artifact_type': 'BIOM'
        }

        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        self.assertTrue(obs_success)
        files = [(biom_fp, 'biom'), (fasta_fp, 'preprocessed_fasta')]
        self.assertEqual(obs_ainfo, [ArtifactInfo(None, 'BIOM', files)])
        self.assertEqual(obs_error, "")

        # Extra ids
        with open(fasta_fp, 'w') as f:
            f.write(">O1 something\nACTG\n>O2\nATGC\n>O3\nATGC\n")
        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error,
            "The representative set sequence file includes observations not "
            "found in the BIOM table: O3")

        # Missing ids
        with open(fasta_fp, 'w') as f:
            f.write(">O1 something\nACTG\n")
        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertEqual(
            obs_error,
            "The representative set sequence file is missing observation ids "
            "found in the BIOM tabe: O2")
Exemplo n.º 20
0
 def test_validate_no_changes_superset(self):
     sample_ids = [
         '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176',
         '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201',
         '1.SKM2.640199'
     ]
     biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                             template=1)
     obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                  parameters, self.out_dir)
     self.assertTrue(obs_success)
     self.assertEqual(obs_ainfo,
                      [ArtifactInfo(None, 'BIOM', [(biom_fp, 'biom')])])
     self.assertEqual(obs_error, "")
Exemplo n.º 21
0
def _validate_ordination_results(files, metadata, out_dir):
    # Magic number [0] -> there is only one plain text file, which is the
    # ordination results
    ord_res_fp = files['plain_text'][0]
    ord_res = OrdinationResults.read(ord_res_fp)

    # Get the ids of the ordination results and the metadata
    ord_res_ids = set(ord_res.samples.index)
    metadata_ids = set(metadata)

    if not metadata_ids.issuperset(ord_res_ids):
        return (False, None, "The ordination results contain samples not "
                "present in the metadata")

    filepaths = [(ord_res_fp, 'plain_text')]

    return True, [ArtifactInfo(None, 'ordination_results', filepaths)], ""
Exemplo n.º 22
0
def _validate_distance_matrix(files, metadata, out_dir):
    """Validates a distance matrix artifact"""
    # Magic number [0] -> there is only one plain text file which is
    # the distance matrix
    dm_fp = files['plain_text'][0]
    dm = DistanceMatrix.read(dm_fp)

    # Get the ids of the distance matrix and the metadata
    dm_ids = set(dm.ids)
    metadata_ids = set(metadata)

    if not metadata_ids.issuperset(dm_ids):
        return (False, None, "The distance matrix contain samples not "
                "present in the metadata")

    filepaths = [(dm_fp, 'plain_text')]

    return True, [ArtifactInfo(None, 'distance_matrix', filepaths)], ""
Exemplo n.º 23
0
    def test_validate_q2_visualization(self):
        # Valid qzv
        obs_succes, obs_ainfo, obs_error = _validate_q2_visualization(
            {'qzv': [self.valid_qzv]}, self.out_dir)
        self.assertEqual(obs_error, "")
        self.assertTrue(obs_succes)
        exp_files = [(self.valid_qzv, 'qzv'),
                     (join(self.out_dir, 'index.html'), 'html_summary'),
                     (join(self.out_dir, 'support_files'), 'html_summary_dir')]
        exp_ainfo = [ArtifactInfo(None, 'q2_visualization', exp_files)]
        self.assertEqual(obs_ainfo, exp_ainfo)

        # Invalid qzv
        obs_succes, obs_ainfo, obs_error = _validate_q2_visualization(
            {'qzv': [self.invalid_qzv]}, self.out_dir)
        self.assertIn("Error loading Qiime 2 visualization:", obs_error)
        self.assertFalse(obs_succes)
        self.assertIsNone(obs_ainfo)
Exemplo n.º 24
0
def _validate_feature_data(files, metadata, out_dir):
    # Magic number [0] -> there is only one plain text file, which is the
    # ordination results
    fdt = files['plain_text'][0]
    fdt_qza = None
    if 'qza' in files:
        fdt_qza = files['qza'][0]

    # basic header check to verify that it looks like a taxonomy file
    with open(fdt) as f:
        line = f.readline()
        if 'Tax' not in line or 'ID' not in line:
            return (False, None, 'The file header seems wrong "%s"' % line)

    filepaths = [(fdt, 'plain_text')]
    if fdt_qza is not None:
        filepaths.append((fdt_qza, 'qza'))

    return True, [ArtifactInfo(None, 'FeatureData', filepaths)], ""
Exemplo n.º 25
0
    def test_validate_no_changes(self):
        sample_ids = [
            '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176',
            '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201',
            '1.SKM2.640199', '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185',
            '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', '1.SKB1.640202',
            '1.SKM1.640183', '1.SKD1.640179', '1.SKD3.640198', '1.SKB5.640181',
            '1.SKB4.640189', '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184',
            '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191'
        ]
        biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                                template=1)

        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo,
                         [ArtifactInfo(None, 'BIOM', [(biom_fp, 'biom')])])
        self.assertEqual(obs_error, "")
Exemplo n.º 26
0
 def test_validate_analysis(self):
     sample_ids = [
         '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
         '1.SKB7.640196'
     ]
     biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                             analysis=1)
     obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                  parameters, self.out_dir)
     exp_fp = partial(join, self.out_dir)
     exp_index_fp = exp_fp('index.html')
     exp_viz_fp = exp_fp('support_files')
     self.assertTrue(obs_success)
     self.assertEqual(obs_ainfo, [
         ArtifactInfo(None, 'BIOM', [(biom_fp, 'biom'),
                                     (exp_index_fp, 'html_summary'),
                                     (exp_viz_fp, 'html_summary_dir')])
     ])
     self.assertEqual(obs_error, "")
Exemplo n.º 27
0
    def test_validate_alpha_vector(self):
        # Create the alpha vector
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192'
        ]
        alpha_vector_fp = self._create_alpha_vector(sample_ids)

        # Test success
        obs_success, obs_ainfo, obs_error = _validate_alpha_vector(
            {'plain_text': [alpha_vector_fp]}, self.metadata, self.out_dir)
        self.assertEqual(obs_error, "")
        self.assertTrue(obs_success)
        exp_ainfo = [
            ArtifactInfo(None, "alpha_vector",
                         [(alpha_vector_fp, 'plain_text')])
        ]
        self.assertEqual(obs_ainfo, exp_ainfo)

        # Test failure wrong ids
        sample_ids = [
            '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', 'NotASample'
        ]
        alpha_vector_fp = self._create_alpha_vector(sample_ids)
        obs_success, obs_ainfo, obs_error = _validate_alpha_vector(
            {'plain_text': [alpha_vector_fp]}, self.metadata, self.out_dir)
        self.assertEqual(
            obs_error, "The alpha vector contains samples not "
            "present in the metadata")
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)

        # Test failure wrong format
        fd, alpha_vector_fp = mkstemp(suffix='.txt', dir=self.out_dir)
        close(fd)
        with open(alpha_vector_fp, 'w') as f:
            f.write("\tobserved_otus\nsample 1\n")
        obs_success, obs_ainfo, obs_error = _validate_alpha_vector(
            {'plain_text': [alpha_vector_fp]}, self.metadata, self.out_dir)
        self.assertEqual(obs_error, "The alpha vector format is incorrect")
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
Exemplo n.º 28
0
    def test_validate_no_changes_superset(self):
        sample_ids = [
            '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176',
            '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201',
            '1.SKM2.640199'
        ]
        biom_fp, job_id, parameters = self._create_job_and_biom(sample_ids,
                                                                template=1)
        obs_success, obs_ainfo, obs_error = validate(self.qclient, job_id,
                                                     parameters, self.out_dir)
        exp_fp = partial(join, self.out_dir)
        exp_index_fp = exp_fp('index.html')
        exp_viz_fp = exp_fp('support_files')

        self.assertTrue(obs_success)
        self.assertEqual(obs_ainfo, [
            ArtifactInfo(None, 'BIOM', [(biom_fp, 'biom'),
                                        (exp_index_fp, 'html_summary'),
                                        (exp_viz_fp, 'html_summary_dir')])
        ])
        self.assertEqual(obs_error, "")
Exemplo n.º 29
0
def _validate_q2_visualization(files, out_dir):
    # Magic number 0 -> there is only 1 qzv file
    qzv_fp = files['qzv'][0]
    # If the loader files this is not a correct Qiime 2 visualization. There
    # is no common exception raised, so we catch all of them
    try:
        q2vis = Visualization.load(qzv_fp)
    except Exception as e:
        return False, None, "Error loading Qiime 2 visualization: %s" % e

    # The visualization in Qiime 2 can contain multiple files and directories.
    # Adding all of them to Qiita can generate a very polluted GUI. In order
    # to improve that, we create a directory where all the files are added
    # (including the original index file) and we create a new index file with
    # an iframe that points to the old index file. This way, we are only adding
    # one HTML file and one directory to the Qiita filepaths.
    html_dir = join(out_dir, 'support_files')
    html_fp = join(out_dir, 'index.html')

    # Extract all the visualization files in the support_files.
    q2vis.export_data(html_dir)

    # Find the index paths. Only HTML based visualizations are currently
    # supported, since everything that we are doing is web based. Not sure
    # if there is any other type of visualizaion in Qiime 2 at this point, but
    # checking here will show a useful error in case that this occurs.
    index_paths = q2vis.get_index_paths()
    if 'html' not in index_paths:
        return (False, None,
                "Only Qiime 2 visualization with an html index are supported")

    index_name = basename(index_paths['html'])
    with open(html_fp, 'w') as f:
        f.write(Q2_INDEX % index_name)

    # We add the original qzv file so users can download it and play with it
    filepaths = [(qzv_fp, 'qzv'), (html_fp, 'html_summary'),
                 (html_dir, 'html_summary_dir')]

    return True, [ArtifactInfo(None, 'q2_visualization', filepaths)], ""
Exemplo n.º 30
0
    def test_validate_SFF(self):
        prep_info = {
            "1.SKB2.640194": {
                "run_prefix": "GAX40"
            },
            "1.SKM4.640180": {
                "run_prefix": "GAX40"
            },
            "1.SKB3.640195": {
                "run_prefix": "GAX50"
            }
        }
        files = {
            'raw_sff': [
                '/path/to/GAX401.sff', '/path/to/GAX402.sff',
                '/path/to/GAX501.sff'
            ]
        }
        job_id, _ = self._create_template_and_job(prep_info, files, "SFF")
        obs_success, obs_ainfo, obs_error = _validate_multiple(
            self.qclient, job_id, prep_info, files, 'SFF')
        self.assertTrue(obs_success)
        filepaths = [('/path/to/GAX401.sff', 'raw_sff'),
                     ('/path/to/GAX402.sff', 'raw_sff'),
                     ('/path/to/GAX501.sff', 'raw_sff')]
        exp = [ArtifactInfo(None, "SFF", filepaths)]
        self.assertEqual(obs_ainfo, exp)
        self.assertEqual(obs_error, "")

        # let's test a failure
        files = {'raw_sff': ['/path/to/GAX401.sff', '/path/to/GAX402.sff']}
        job_id, _ = self._create_template_and_job(prep_info, files, "SFF")
        obs_success, obs_ainfo, obs_error = _validate_multiple(
            self.qclient, job_id, prep_info, files, 'SFF')
        error = ("Error creating artifact. Offending files:\nraw_sff: The "
                 "following run prefixes in the prep information file do not "
                 "match any file: GAX50")
        self.assertFalse(obs_success)
        self.assertIsNone(obs_ainfo)
        self.assertCountEqual(obs_error, error)