示例#1
0
    def test_insdc_status(self):
        ppd = PreprocessedData(1)

        # verifying current value
        self.assertEqual(ppd.submitted_to_insdc_status(), 'submitting')

        # changing value and then verifying new value
        ppd.update_insdc_status('failed')
        self.assertEqual(ppd.submitted_to_insdc_status(), 'failed')

        # checking failure
        with self.assertRaises(ValueError):
            ppd.update_insdc_status('not a valid status')
示例#2
0
    def test_insdc_status(self):
        ppd = PreprocessedData(1)

        # verifying current value
        self.assertEqual(ppd.submitted_to_insdc_status(), 'submitting')

        # changing value and then verifying new value
        ppd.update_insdc_status('failed')
        self.assertEqual(ppd.submitted_to_insdc_status(), 'failed')

        # checking failure
        with self.assertRaises(ValueError):
            ppd.update_insdc_status('not a valid status')
示例#3
0
    def test_update_insdc_status(self):
        """Able to update insdc status"""
        pd = PreprocessedData(1)
        self.assertEqual(pd.submitted_to_insdc_status(), 'submitting')
        pd.update_insdc_status('failed')
        self.assertEqual(pd.submitted_to_insdc_status(), 'failed')

        pd.update_insdc_status('success', 'foo', 'bar')
        self.assertEqual(pd.submitted_to_insdc_status(), 'success')
        self.assertEqual(pd.ebi_study_accession, 'foo')
        self.assertEqual(pd.ebi_submission_accession, 'bar')

        with self.assertRaises(ValueError):
            pd.update_insdc_status('not valid state')

        with self.assertRaises(ValueError):
            pd.update_insdc_status('success', 'only one accession')
示例#4
0
    def test_update_insdc_status(self):
        """Able to update insdc status"""
        pd = PreprocessedData(1)
        self.assertEqual(pd.submitted_to_insdc_status(), 'submitting')
        pd.update_insdc_status('failed')
        self.assertEqual(pd.submitted_to_insdc_status(), 'failed')

        pd.update_insdc_status('success', 'foo', 'bar')
        self.assertEqual(pd.submitted_to_insdc_status(), 'success')
        self.assertEqual(pd.ebi_study_accession, 'foo')
        self.assertEqual(pd.ebi_submission_accession, 'bar')

        with self.assertRaises(ValueError):
            pd.update_insdc_status('not valid state')

        with self.assertRaises(ValueError):
            pd.update_insdc_status('success', 'only one accession')
示例#5
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath

    Notes
    -----
    If fastq_dir_fp is passed, it must not contain any empty files, or
    gzipped empty files
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ("submitting", "success"):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status("submitting")

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id("ENA", "ontology"))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = "Other"
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError(
            "Unrecognized investigation type: '%s'. This term "
            "is neither one of the official terms nor one of the "
            "user-defined terms in the ENA ontology"
        )

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                wrote_sequences = False
                with gzopen(sample_fp, "w") as fh:
                    for record in iterator:
                        fh.write(record)
                        wrote_sequences = True

                if not wrote_sequences:
                    remove(sample_fp)

    output_dir = fastq_dir_fp + "_submission"

    samp_fp = join(fastq_dir_fp, "sample_metadata.txt")
    prep_fp = join(fastq_dir_fp, "prep_metadata.txt")

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp("study.xml")
    sample_fp = get_output_fp("sample.xml")
    experiment_fp = get_output_fp("experiment.xml")
    run_fp = get_output_fp("run.xml")
    submission_fp = get_output_fp("submission.xml")

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError("The output folder already exists: %s" % output_dir)

    with open(samp_fp, "U") as st, open(prep_fp, "U") as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info["study_abstract"],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids,
        )

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status("failed")

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status("success", study_accession, submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
示例#6
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession