Пример #1
0
    def _process_investigation_type(self, inv_type, user_def_type, new_type):
        """Return the investigation_type and add it to the ontology if needed

        Parameters
        ----------
        inv_type : str
            The investigation type
        user_def_type : str
            The user-defined investigation type
        new_type : str
            The new user-defined investigation_type

        Returns
        -------
        str
            The investigation type chosen by the user
        """
        if inv_type == 'None Selected':
            inv_type = None
        elif inv_type == 'Other' and user_def_type == 'New Type':
            # This is a nre user defined investigation type so store it
            inv_type = new_type
            ontology = Ontology(convert_to_id('ENA', 'ontology'))
            ontology.add_user_defined_term(inv_type)
        elif inv_type == 'Other' and user_def_type != 'New Type':
            inv_type = user_def_type
        return inv_type
Пример #2
0
    def __init__(self, preprocessed_data_id, study_title, study_abstract,
                 investigation_type, empty_value='no_data',
                 new_investigation_type=None, pmids=None, **kwargs):
        self.preprocessed_data_id = preprocessed_data_id
        self.study_title = study_title
        self.study_abstract = study_abstract
        self.investigation_type = investigation_type
        self.empty_value = empty_value
        self.new_investigation_type = new_investigation_type
        self.sequence_files = []

        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.pmids = pmids if pmids is not None else []

        self.ebi_dir = self._get_ebi_dir()

        if self.investigation_type == 'Other' and \
                self.new_investigation_type is None:
            raise ValueError("If the investigation_type is 'Other' you have "
                             " to specify a value for new_investigation_type.")

        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        if ontology.term_type(self.investigation_type) == 'not_ontology':
            raise ValueError("The investigation type must be part of ENA's "
                             "ontology, '%s' is not valid" %
                             self.investigation_type)

        # dicts that map investigation_type to library attributes
        lib_strategies = {'metagenome': 'POOLCLONE',
                          'mimarks-survey': 'AMPLICON'}
        lib_selections = {'mimarks-survey': 'PCR'}
        lib_sources = {}

        # if the investigation_type is 'Other' we should use the value in
        # the new_investigation_type attribute to retrieve this information
        if self.investigation_type == 'Other':
            key = self.new_investigation_type
        else:
            key = self.investigation_type

        self.library_strategy = lib_strategies.get(key, "OTHER")
        self.library_source = lib_sources.get(key, "METAGENOMIC")
        self.library_selection = lib_selections.get(key, "unspecified")

        # This allows addition of other arbitrary study metadata
        self.additional_metadata = self._stringify_kwargs(kwargs)

        # This will hold the submission's samples, keyed by the sample name
        self.samples = {}
Пример #3
0
class TestOntology(TestCase):
    def setUp(self):
        self.ontology = Ontology(999999999)

    def testConvertToID(self):
        self.assertEqual(convert_to_id('ENA', 'ontology'), 999999999)

    def testConvertFromID(self):
        self.assertEqual(convert_from_id(999999999, 'ontology'), 'ENA')

    def testShortNameProperty(self):
        self.assertEqual(self.ontology.shortname, 'ENA')

    def testTerms(self):
        obs = self.ontology.terms
        self.assertEqual(obs, [
            'Whole Genome Sequencing',
            'Metagenomics',
            'Transcriptome Analysis',
            'Resequencing',
            'Epigenetics',
            'Synthetic Genomics',
            'Forensic or Paleo-genomics',
            'Gene Regulation Study',
            'Cancer Genomics',
            'Population Genomics',
            'RNASeq',
            'Exome Sequencing',
            'Pooled Clone Sequencing',
            'Other'])

    def test_user_defined_terms(self):
        obs = self.ontology.user_defined_terms
        self.assertEqual(obs, [])

    def test_term_type(self):
        obs = self.ontology.term_type('RNASeq')
        self.assertEqual('ontology', obs)

        obs = self.ontology.term_type('Sasquatch')
        self.assertEqual('not_ontology', obs)

        self.ontology.add_user_defined_term('Test Term')
        obs = self.ontology.term_type('Test Term')
        self.assertEqual('user_defined', obs)

    def test_add_user_defined_term(self):
        self.assertFalse('Test Term' in self.ontology.user_defined_terms)
        pre = len(self.ontology.user_defined_terms)
        self.ontology.add_user_defined_term('Test Term')
        post = len(self.ontology.user_defined_terms)
        self.assertTrue('Test Term' in self.ontology.user_defined_terms)
        self.assertEqual(post-pre, 1)

    def testContains(self):
        self.assertTrue('Metagenomics' in self.ontology)
        self.assertFalse('NotATerm' in self.ontology)
Пример #4
0
def ontology_patch_handler(req_op, req_path, req_value=None, req_from=None):
    """Patches an ontology

    Parameters
    ----------
    req_op : str
        The operation to perform on the ontology
    req_path : str
        The ontology to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Returns
    -------
    dict of {str: str}
        A dictionary of the form: {'status': str, 'message': str} in which
        status is the status of the request ('error' or 'success') and message
        is a human readable string with the error message in case that status
        is 'error'.
    """
    if req_op == "add":
        req_path = [v for v in req_path.split("/") if v]
        if len(req_path) != 1:
            return {"status": "error", "message": "Incorrect path parameter"}
        req_path = req_path[0]

        try:
            o_id = convert_to_id(req_path, "ontology")
        except QiitaDBLookupError:
            return {"status": "error", "message": 'Ontology "%s" does not exist' % req_path}

        ontology = Ontology(o_id)
        ontology.add_user_defined_term(req_value)

        return {"status": "success", "message": ""}
    else:
        return {
            "status": "error",
            "message": 'Operation "%s" not supported. ' "Current supported operations: add" % req_op,
        }
Пример #5
0
def _get_ENA_ontology():
    """Returns the information of the ENA ontology

    Returns
    -------
    dict of {str: list of strings}
        A dictionary of the form {'ENA': list of str, 'User': list of str}
        with the ENA-defined terms and the User-defined terms, respectivelly.
    """
    ontology = Ontology(convert_to_id('ENA', 'ontology'))
    ena_terms = sorted(ontology.terms)
    # make "Other" last on the list
    ena_terms.remove('Other')
    ena_terms.append('Other')

    return {'ENA': ena_terms, 'User': sorted(ontology.user_defined_terms)}
Пример #6
0
class TestOntology(TestCase):
    def setUp(self):
        self.ontology = Ontology(999999999)

    def testConvertToID(self):
        self.assertEqual(convert_to_id('ENA', 'ontology'), 999999999)

    def testConvertFromID(self):
        self.assertEqual(convert_from_id(999999999, 'ontology'), 'ENA')

    def testShortNameProperty(self):
        self.assertEqual(self.ontology.shortname, 'ENA')

    def testTerms(self):
        obs = self.ontology.terms
        self.assertEqual(obs, [
            'Whole Genome Sequencing', 'Metagenomics',
            'Transcriptome Analysis', 'Resequencing', 'Epigenetics',
            'Synthetic Genomics', 'Forensic or Paleo-genomics',
            'Gene Regulation Study', 'Cancer Genomics', 'Population Genomics',
            'RNASeq', 'Exome Sequencing', 'Pooled Clone Sequencing', 'Other'
        ])

    def test_user_defined_terms(self):
        obs = self.ontology.user_defined_terms
        self.assertEqual(obs, [])

    def test_term_type(self):
        obs = self.ontology.term_type('RNASeq')
        self.assertEqual('ontology', obs)

        obs = self.ontology.term_type('Sasquatch')
        self.assertEqual('not_ontology', obs)

        self.ontology.add_user_defined_term('Test Term')
        obs = self.ontology.term_type('Test Term')
        self.assertEqual('user_defined', obs)

    def test_add_user_defined_term(self):
        self.assertFalse('Test Term' in self.ontology.user_defined_terms)
        pre = len(self.ontology.user_defined_terms)
        self.ontology.add_user_defined_term('Test Term')
        post = len(self.ontology.user_defined_terms)
        self.assertTrue('Test Term' in self.ontology.user_defined_terms)
        self.assertEqual(post - pre, 1)

    def testContains(self):
        self.assertTrue('Metagenomics' in self.ontology)
        self.assertFalse('NotATerm' in self.ontology)
Пример #7
0
    def __init__(self, artifact_id, action):
        error_msgs = []

        if action not in self.valid_ebi_actions:
            error_msg = ("%s is not a valid EBI submission action, valid "
                         "actions are: %s" %
                         (action, ', '.join(self.valid_ebi_actions)))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
        self.action = action
        self.artifact = Artifact(artifact_id)
        if not self.artifact.can_be_submitted_to_ebi:
            error_msg = ("Artifact %d cannot be submitted to EBI" %
                         self.artifact.id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.study = self.artifact.study
        self.sample_template = self.study.sample_template
        # If we reach this point, there should be only one prep template
        # attached to the artifact. By design, each artifact has at least one
        # prep template. Artifacts with more than one prep template cannot be
        # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should
        # be set to false, which is checked in the previous if statement
        self.prep_template = self.artifact.prep_templates[0]

        if self.artifact.is_submitted_to_ebi and action != 'MODIFY':
            error_msg = ("Cannot resubmit! Artifact %d has already "
                         "been submitted to EBI." % artifact_id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.artifact_id = artifact_id
        self.study_title = self.study.title
        self.study_abstract = self.study.info['study_abstract']

        it = self.prep_template.investigation_type
        if it in ena_ontology.terms:
            self.investigation_type = it
            self.new_investigation_type = None
        elif it in ena_ontology.user_defined_terms:
            self.investigation_type = 'Other'
            self.new_investigation_type = it
        else:
            # This should never happen
            error_msgs.append("Unrecognized investigation type: '%s'. This "
                              "term is neither one of the official terms nor "
                              "one of the user-defined terms in the ENA "
                              "ontology." % it)
        _, base_fp = get_mountpoint("preprocessed_data")[0]
        self.ebi_dir = '%d_ebi_submission' % artifact_id
        self.full_ebi_dir = join(base_fp, self.ebi_dir)
        self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt')
        self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml')
        self.xml_dir = join(self.full_ebi_dir, 'xml_dir')
        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.publications = self.study.publications

        # getting the restrictions
        st_restrictions = [self.sample_template.columns_restrictions['EBI']]
        pt_restrictions = [self.prep_template.columns_restrictions['EBI']]
        if self.artifact.data_type in TARGET_GENE_DATA_TYPES:
            # adding restictions on primer and barcode as these are
            # conditionally requiered for target gene
            pt_restrictions.append(
                PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex'])
        st_missing = self.sample_template.check_restrictions(st_restrictions)
        pt_missing = self.prep_template.check_restrictions(pt_restrictions)
        # testing if there are any missing columns
        if st_missing:
            error_msgs.append("Missing column in the sample template: %s" %
                              ', '.join(list(st_missing)))
        if pt_missing:
            error_msgs.append("Missing column in the prep template: %s" %
                              ', '.join(list(pt_missing)))

        # generating all samples from sample template
        self.samples = {}
        self.samples_prep = {}
        self.sample_demux_fps = {}
        get_output_fp = partial(join, self.full_ebi_dir)
        nvp = []
        nvim = []
        for k, v in viewitems(self.sample_template):
            if k not in self.prep_template:
                continue
            sample_prep = self.prep_template[k]

            # validating required fields
            if ('platform' not in sample_prep
                    or sample_prep['platform'] is None):
                nvp.append(k)
            else:
                platform = sample_prep['platform'].upper()
                if platform not in self.valid_platforms:
                    nvp.append(k)
                else:
                    if ('instrument_model' not in sample_prep
                            or sample_prep['instrument_model'] is None):
                        nvim.append(k)
                    else:
                        im = sample_prep['instrument_model'].upper()
                        if im not in self.valid_platforms[platform]:
                            nvim.append(k)

            self.samples[k] = v
            self.samples_prep[k] = sample_prep
            self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k)

        if nvp:
            error_msgs.append("These samples do not have a valid platform "
                              "(instrumet model wasn't checked): %s" %
                              (', '.join(nvp)))
        if nvim:
            error_msgs.append("These samples do not have a valid instrument "
                              "model: %s" % (', '.join(nvim)))
        if error_msgs:
            error_msgs = ("Errors found during EBI submission for study #%d, "
                          "artifact #%d and prep template #%d:\n%s" %
                          (self.study.id, artifact_id, self.prep_template.id,
                           '\n'.join(error_msgs)))
            LogEntry.create('Runtime', error_msgs)
            raise EBISubmissionError(error_msgs)

        self._sample_aliases = {}
        self._experiment_aliases = {}
        self._run_aliases = {}

        self._ebi_sample_accessions = \
            self.sample_template.ebi_sample_accessions
        self._ebi_experiment_accessions = \
            self.prep_template.ebi_experiment_accessions
Пример #8
0
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ('submitting', 'success'):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status('submitting')

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = 'Other'
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError("Unrecognized investigation type: '%s'. This term "
                         "is neither one of the official terms nor one of the "
                         "user-defined terms in the ENA ontology")

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [
            path for _, path, ftype in preprocessed_data.get_filepaths()
            if ftype == 'preprocessed_demux'
        ][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh,
                                                      list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                with gzopen(sample_fp, 'w') as fh:
                    for record in iterator:
                        fh.write(record)

    output_dir = fastq_dir_fp + '_submission'

    samp_fp = join(fastq_dir_fp, 'sample_metadata.txt')
    prep_fp = join(fastq_dir_fp, 'prep_metadata.txt')

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp('study.xml')
    sample_fp = get_output_fp('sample.xml')
    experiment_fp = get_output_fp('experiment.xml')
    run_fp = get_output_fp('run.xml')
    submission_fp = get_output_fp('submission.xml')

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError('The output folder already exists: %s' % output_dir)

    with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info['study_abstract'],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids)

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp,
                                   submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status('failed')

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status('success', study_accession,
                                                  submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
Пример #9
0
def make_environment(load_ontologies, download_reference, add_demo_user):
    r"""Creates the new environment specified in the configuration

    Parameters
    ----------
    load_ontologies : bool
        Whether or not to retrieve and unpack ontology information
    download_reference : bool
        Whether or not to download greengenes reference files
    add_demo_user : bool
        Whether or not to add a demo user to the database with username
        [email protected] and password "password"

    Raises
    ------
    IOError
        If `download_reference` is true but one of the files cannot be
        retrieved
    QiitaEnvironmentError
        If the environment already exists
    """
    if load_ontologies and qiita_config.test_environment:
        raise EnvironmentError("Cannot load ontologies in a test environment! "
                               "Pass --no-load-ontologies, or set "
                               "TEST_ENVIRONMENT = FALSE in your "
                               "configuration")

    # Connect to the postgres server
    admin_conn = SQLConnectionHandler(admin='admin_without_database')

    # Check that it does not already exists
    if _check_db_exists(qiita_config.database, admin_conn):
        raise QiitaEnvironmentError(
            "Database {0} already present on the system. You can drop it "
            "by running 'qiita_env drop'".format(qiita_config.database))

    # Create the database
    print('Creating database')
    admin_conn.set_autocommit('on')
    admin_conn.execute('CREATE DATABASE %s' % qiita_config.database)
    admin_conn.set_autocommit('off')

    del admin_conn

    # Connect to the postgres server, but this time to the just created db
    conn = SQLConnectionHandler()

    print('Inserting database metadata')
    # Build the SQL layout into the database
    with open(SETTINGS_FP, 'U') as f:
        conn.execute(f.read())

    # Insert the settings values to the database
    conn.execute("INSERT INTO settings (test, base_data_dir, base_work_dir) "
                 "VALUES (%s, %s, %s)",
                 (qiita_config.test_environment, qiita_config.base_data_dir,
                  qiita_config.working_dir))

    create_layout_and_patch(conn, verbose=True)

    if load_ontologies:
        _add_ontology_data(conn)

        # these values can only be added if the environment is being loaded
        # with the ontologies, thus this cannot exist inside intialize.sql
        # because otherwise loading the ontologies would be a requirement
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        ontology.add_user_defined_term('Amplicon Sequencing')

    if download_reference:
        _download_reference_files(conn)

    # we don't do this if it's a test environment because populate.sql
    # already adds this user...
    if add_demo_user and not qiita_config.test_environment:
        conn.execute("""
            INSERT INTO qiita.qiita_user (email, user_level_id, password,
                                          name, affiliation, address, phone)
            VALUES
            ('*****@*****.**', 4,
             '$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe',
             'Demo', 'Qitta Dev', '1345 Colorado Avenue', '303-492-1984')""")

        print('Demo user successfully created')

    if qiita_config.test_environment:
        _populate_test_db(conn)
        print('Test environment successfully created')
    else:
        print('Production environment successfully created')
Пример #10
0
 def setUp(self):
     self.ontology = Ontology(807481739)
Пример #11
0
    def render(self, study_id, preprocessed_data):
        user = self.current_user
        ppd_id = preprocessed_data.id
        vamps_status = preprocessed_data.is_submitted_to_vamps
        filepaths = preprocessed_data.filepaths
        is_local_request = is_localhost(self.request.headers['host'])
        show_ebi_btn = user.level == "admin"
        processing_status, processing_status_msg = \
            get_artifact_processing_status(preprocessed_data)
        processed_data = sorted([pd.id for pd in preprocessed_data.children])

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        # ppd can only have 1 prep template
        prep_template = preprocessed_data.prep_templates[0]
        # this block might seem wrong but is here due to a possible
        # pathological case that we used to have in the system: preprocessed
        # data without valid prep_templates
        prep_templates = preprocessed_data.prep_templates
        if len(prep_templates) == 1:
            prep_template_id = prep_template.id
            raw_data_id = prep_template.artifact.id
            inv_type = prep_template.investigation_type or "None selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        process_params = {
            param.id: (generate_param_str(param), param.name)
            for param in Command(3).default_parameter_sets
        }
        # We just need to provide an ID for the default parameters,
        # so we can initialize the interface
        default_params = min(process_params.keys())

        ebi_link = None
        if preprocessed_data.is_submitted_to_ebi:
            ebi_link = EBI_LINKIFIER.format(
                Study(study_id).ebi_study_accession)

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms,
            process_params=process_params,
            default_params=default_params,
            study_id=preprocessed_data.study.id,
            processing_status=processing_status,
            processing_status_msg=processing_status_msg,
            processed_data=processed_data,
            ebi_link=ebi_link)
Пример #12
0
    def __init__(self,
                 preprocessed_data_id,
                 study_title,
                 study_abstract,
                 investigation_type,
                 empty_value='no_data',
                 new_investigation_type=None,
                 pmids=None,
                 **kwargs):
        self.preprocessed_data_id = preprocessed_data_id
        self.study_title = study_title
        self.study_abstract = study_abstract
        self.investigation_type = investigation_type
        self.empty_value = empty_value
        self.new_investigation_type = new_investigation_type
        self.sequence_files = []

        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.pmids = pmids if pmids is not None else []

        self.ebi_dir = self._get_ebi_dir()

        if self.investigation_type == 'Other' and \
                self.new_investigation_type is None:
            raise ValueError("If the investigation_type is 'Other' you have "
                             " to specify a value for new_investigation_type.")

        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        if ontology.term_type(self.investigation_type) == 'not_ontology':
            raise ValueError("The investigation type must be part of ENA's "
                             "ontology, '%s' is not valid" %
                             self.investigation_type)

        # dicts that map investigation_type to library attributes
        lib_strategies = {
            'metagenome': 'POOLCLONE',
            'mimarks-survey': 'AMPLICON'
        }
        lib_selections = {'mimarks-survey': 'PCR'}
        lib_sources = {}

        # if the investigation_type is 'Other' we should use the value in
        # the new_investigation_type attribute to retrieve this information
        if self.investigation_type == 'Other':
            key = self.new_investigation_type
        else:
            key = self.investigation_type

        self.library_strategy = lib_strategies.get(key, "OTHER")
        self.library_source = lib_sources.get(key, "METAGENOMIC")
        self.library_selection = lib_selections.get(key, "unspecified")

        # This allows addition of other arbitrary study metadata
        self.additional_metadata = self._stringify_kwargs(kwargs)

        # This will hold the submission's samples, keyed by the sample name
        self.samples = {}
Пример #13
0
 def setUp(self):
     self.ontology = Ontology(999999999)
Пример #14
0
    def post(self, study_id):
        study_id = int(study_id)
        user = User(self.current_user)
        try:
            study = Study(study_id)
        except QiitaDBUnknownIDError:
            # Study not in database so fail nicely
            raise HTTPError(404, "Study %d does not exist" % study_id)
        else:
            check_access(User(self.current_user), study,
                         raise_error=True)

        # vars to add sample template
        msg = ''
        msg_level = ''
        tab_to_display = ''
        sample_template = self.get_argument('sample_template', None)
        # vars to add raw data
        filetype = self.get_argument('filetype', None)
        previous_raw_data = self.get_argument('previous_raw_data', None)
        # vars to add prep template
        add_prep_template = self.get_argument('add_prep_template', None)
        raw_data_id = self.get_argument('raw_data_id', None)
        data_type_id = self.get_argument('data_type_id', None)
        make_public = self.get_argument('make_public', False)
        make_sandbox = self.get_argument('make_sandbox', False)
        approve_study = self.get_argument('approve_study', False)
        request_approval = self.get_argument('request_approval', False)
        investigation_type = self.get_argument('investigation-type', None)
        user_defined_investigation_type = self.get_argument(
            'user-defined-investigation-type', None)
        new_investigation_type = self.get_argument('new-investigation-type',
                                                   None)

        # None Selected is the equivalent to the user not specifying the info
        # thus we should make the investigation_type None
        if investigation_type == "" or investigation_type == "None Selected":
            investigation_type = None

        # to update investigation type
        update_investigation_type = self.get_argument(
            'update_investigation_type', None)
        edit_investigation_type = self.get_argument('edit-investigation-type',
                                                    None)
        edit_user_defined_investigation_type = self.get_argument(
            'edit-user-defined-investigation-type', None)
        edit_new_investigation_type = self.get_argument(
            'edit-new-investigation-type', None)

        # None Selected is the equivalent to the user not specifying the info
        # thus we should make the investigation_type None
        if edit_investigation_type == "" or \
                edit_investigation_type == "None Selected":
            edit_investigation_type = None

        msg_level = 'success'
        if sample_template:
            # processing sample templates

            _, base_fp = get_mountpoint("uploads")[0]
            fp_rsp = join(base_fp, str(study_id), sample_template)
            if not exists(fp_rsp):
                raise HTTPError(400, "This file doesn't exist: %s" % fp_rsp)

            try:
                # deleting previous uploads and inserting new one
                yield Task(self.remove_add_study_template,
                           study.raw_data,
                           study_id, fp_rsp)
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError, KeyError,
                    CParserError, QiitaDBDuplicateHeaderError) as e:
                msg = html_error_message % ('parsing the sample template:',
                                            basename(fp_rsp), str(e))
                self.display_template(study, msg, "danger")
                return

            msg = ("The sample template '%s' has been added" %
                   sample_template)
            tab_to_display = ""

        elif request_approval:
            study.status = 'awaiting_approval'
            msg = "Study sent to admin for approval"
            tab_to_display = ""

        elif make_public:
            msg = ''
            study.status = 'public'
            msg = "Study set to public"
            tab_to_display = ""

        elif make_sandbox:
            msg = ''
            study.status = 'sandbox'
            msg = "Study reverted to sandbox"
            tab_to_display = ""

        elif approve_study:
            # make sure user is admin, then make full private study
            if user.level == 'admin' or not \
                    qiita_config.require_approval:
                study.status = 'private'
                msg = "Study approved"
                tab_to_display = ""

        elif filetype or previous_raw_data:
            # adding blank raw data
            if filetype and previous_raw_data:
                msg = ("You can not specify both a new raw data and a "
                       "previouly used one")
            elif filetype:
                try:
                    RawData.create(filetype, [study])
                except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                        QiitaDBDuplicateError, IOError, ValueError, KeyError,
                        CParserError) as e:
                    msg = html_error_message % ("creating a new raw data "
                                                "object for study:",
                                                str(study.id), str(e))
                    self.display_template(study, msg, "danger")
                    return
                msg = ""
            else:
                raw_data = [RawData(rd) for rd in previous_raw_data]
                study.add_raw_data(raw_data)
                msg = ""
            tab_to_display = ""

        elif add_prep_template and raw_data_id and data_type_id:
            # adding prep templates

            if investigation_type == 'Other' and \
                    user_defined_investigation_type == 'New Type':
                investigation_type = new_investigation_type

                # this is a new user defined investigation type so store it
                ontology = Ontology(convert_to_id('ENA', 'ontology'))
                ontology.add_user_defined_term(investigation_type)
            elif investigation_type == 'Other' and \
                    user_defined_investigation_type != 'New Type':
                investigation_type = user_defined_investigation_type

            raw_data_id = int(raw_data_id)
            _, base_path = get_mountpoint("uploads")[0]
            fp_rpt = join(base_path, str(study_id), add_prep_template)
            if not exists(fp_rpt):
                raise HTTPError(400, "This file doesn't exist: %s" % fp_rpt)

            try:
                # inserting prep templates
                yield Task(self.remove_add_prep_template, fp_rpt, raw_data_id,
                           study, data_type_id, investigation_type)
            except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
                    QiitaDBDuplicateError, IOError, ValueError,
                    CParserError) as e:
                msg = html_error_message % ("parsing the prep template: ",
                                            basename(fp_rpt), str(e))
                self.display_template(study, msg, "danger",
                                      str(raw_data_id))
                return

            msg = "Your prep template was added"
            tab_to_display = str(raw_data_id)

        elif update_investigation_type:
            # updating the prep template investigation type

            pt = PrepTemplate(update_investigation_type)
            investigation_type = edit_investigation_type

            # figure out whether to add it as a user defined term or not
            if edit_investigation_type == 'Other' and \
                    edit_user_defined_investigation_type == 'New Type':
                investigation_type = edit_new_investigation_type

                # this is a new user defined investigation type so store it
                ontology = Ontology(convert_to_id('ENA', 'ontology'))
                ontology.add_user_defined_term(investigation_type)

            elif investigation_type == 'Other' and \
                    user_defined_investigation_type != 'New Type':
                investigation_type = edit_user_defined_investigation_type

            try:
                pt.investigation_type = investigation_type
            except QiitaDBColumnError as e:
                msg = html_error_message % (", invalid investigation type: ",
                                            investigation_type, str(e))
                self.display_template(study, msg, "danger",
                                      str(pt.raw_data))
                return

            msg = "The prep template has been updated!"
            tab_to_display = str(pt.raw_data)

        else:
            msg = ("Error, did you select a valid uploaded file or are "
                   "passing the correct parameters?")
            msg_level = 'danger'
            tab_to_display = ""

        self.display_template(study, msg, msg_level, tab_to_display)
Пример #15
0
 def setUp(self):
     self.ontology = Ontology(999999999)
Пример #16
0
    def render(self, study_id, preprocessed_data):
        user = self.current_user
        ppd_id = preprocessed_data.id
        ebi_status = preprocessed_data.submitted_to_insdc_status()
        ebi_study_accession = preprocessed_data.ebi_study_accession
        ebi_submission_accession = preprocessed_data.ebi_submission_accession
        vamps_status = preprocessed_data.submitted_to_vamps_status()
        filepaths = preprocessed_data.get_filepaths()
        is_local_request = self._is_local()
        show_ebi_btn = user.level == "admin"
        processing_status = preprocessed_data.processing_status
        processed_data = preprocessed_data.processed_data

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        if PrepTemplate.exists(preprocessed_data.prep_template):
            prep_template_id = preprocessed_data.prep_template
            prep_template = PrepTemplate(prep_template_id)
            raw_data_id = prep_template.raw_data
            inv_type = prep_template.investigation_type or "None Selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        process_params = {param.id: (generate_param_str(param), param.name)
                          for param in ProcessedSortmernaParams.iter()}
        # We just need to provide an ID for the default parameters,
        # so we can initialize the interface
        default_params = 1

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            ebi_status=ebi_status,
            ebi_study_accession=ebi_study_accession,
            ebi_submission_accession=ebi_submission_accession,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms,
            process_params=process_params,
            default_params=default_params,
            study_id=preprocessed_data.study,
            processing_status=processing_status,
            processed_data=processed_data)
Пример #17
0
def make_environment(load_ontologies, download_reference, add_demo_user):
    r"""Creates the new environment specified in the configuration

    Parameters
    ----------
    load_ontologies : bool
        Whether or not to retrieve and unpack ontology information
    download_reference : bool
        Whether or not to download greengenes reference files
    add_demo_user : bool
        Whether or not to add a demo user to the database with username
        [email protected] and password "password"

    Raises
    ------
    IOError
        If `download_reference` is true but one of the files cannot be
        retrieved
    QiitaEnvironmentError
        If the environment already exists
    """
    if load_ontologies and qiita_config.test_environment:
        raise EnvironmentError("Cannot load ontologies in a test environment! "
                               "Pass --no-load-ontologies, or set "
                               "TEST_ENVIRONMENT = FALSE in your "
                               "configuration")

    # Connect to the postgres server
    admin_conn = SQLConnectionHandler(admin='admin_without_database')

    # Check that it does not already exists
    if _check_db_exists(qiita_config.database, admin_conn):
        raise QiitaEnvironmentError(
            "Database {0} already present on the system. You can drop it "
            "by running 'qiita_env drop'".format(qiita_config.database))

    # Create the database
    print('Creating database')
    admin_conn.set_autocommit('on')
    admin_conn.execute('CREATE DATABASE %s' % qiita_config.database)
    admin_conn.set_autocommit('off')

    del admin_conn

    # Connect to the postgres server, but this time to the just created db
    conn = SQLConnectionHandler()

    print('Inserting database metadata')
    # Build the SQL layout into the database
    with open(SETTINGS_FP, 'U') as f:
        conn.execute(f.read())

    # Insert the settings values to the database
    conn.execute("INSERT INTO settings (test, base_data_dir, base_work_dir) "
                 "VALUES (%s, %s, %s)",
                 (qiita_config.test_environment, qiita_config.base_data_dir,
                  qiita_config.working_dir))

    create_layout_and_patch(conn, verbose=True)

    if load_ontologies:
        _add_ontology_data(conn)

        # these values can only be added if the environment is being loaded
        # with the ontologies, thus this cannot exist inside intialize.sql
        # because otherwise loading the ontologies would be a requirement
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        ontology.add_user_defined_term('Amplicon Sequencing')

    if download_reference:
        _download_reference_files(conn)

    # we don't do this if it's a test environment because populate.sql
    # already adds this user...
    if add_demo_user and not qiita_config.test_environment:
        conn.execute("""
            INSERT INTO qiita.qiita_user (email, user_level_id, password,
                                          name, affiliation, address, phone)
            VALUES
            ('*****@*****.**', 4,
             '$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe',
             'Demo', 'Qitta Dev', '1345 Colorado Avenue', '303-492-1984')""")
        conn.execute("""
            INSERT INTO qiita.analysis (email, name, description, dflt,
                                        analysis_status_id)
            VALUES
            ('*****@*****.**', '[email protected]', 'dflt', 't', 1)""")

        print('Demo user successfully created')

    if qiita_config.test_environment:
        _populate_test_db(conn)
        print('Test environment successfully created')
    else:
        print('Production environment successfully created')
Пример #18
0
    def render(self, study, raw_data):
        user = self.current_user
        study_status = study.status
        user_level = user.level
        raw_data_id = raw_data.id
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # Get the available prep template data types
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        # Get all the information about the prep templates
        available_prep_templates = []
        for p in sorted(raw_data.prep_templates):
            if PrepTemplate.exists(p):
                pt = PrepTemplate(p)
                # if the prep template doesn't belong to this study, skip
                if study.id == pt.study_id:
                    available_prep_templates.append(pt)

        # getting filepath_types
        if raw_data.filetype == 'SFF':
            fts = ['sff']
        elif raw_data.filetype == 'FASTA':
            fts = ['fasta', 'qual']
        elif raw_data.filetype == 'FASTQ':
            fts = ['barcodes', 'forward seqs', 'reverse seqs']
        else:
            fts = [
                k.split('_', 1)[1].replace('_', ' ')
                for k in get_filepath_types() if k.startswith('raw_')
            ]

        # The raw data can be edited (e.i. adding prep templates and files)
        # only if the study is sandboxed or the current user is an admin
        is_editable = study_status == 'sandbox' or user_level == 'admin'

        # Get the files linked with the raw_data
        raw_data_files = raw_data.get_filepaths()

        # Get the status of the data linking
        raw_data_link_status = raw_data.link_filepaths_status

        # By default don't show the unlink button
        show_unlink_btn = False
        # By default disable the the link file button
        disable_link_btn = True
        # Define the message for the link status
        if raw_data_link_status == 'linking':
            link_msg = "Linking files..."
        elif raw_data_link_status == 'unlinking':
            link_msg = "Unlinking files..."
        else:
            # The link button is only disable if raw data link status is
            # linking or unlinking, so we can enable it here
            disable_link_btn = False
            # The unlink button is only shown if the study is editable, the raw
            # data linking status is not in linking or unlinking, and there are
            # files attached to the raw data. At this  point, we are sure that
            # the raw data linking status is not in linking or unlinking so we
            # still need to check if it is editable or there are files attached
            show_unlink_btn = is_editable and raw_data_files
            if raw_data_link_status.startswith('failed'):
                link_msg = "Error (un)linking files: %s" % raw_data_link_status
            else:
                link_msg = ""

        # Get the raw_data filetype
        raw_data_filetype = raw_data.filetype

        return self.render_string(
            "study_description_templates/raw_data_editor_tab.html",
            study_id=study.id,
            study_status=study_status,
            user_level=user_level,
            raw_data_id=raw_data_id,
            files=files,
            data_types=data_types,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            available_prep_templates=available_prep_templates,
            filepath_types=fts,
            is_editable=is_editable,
            show_unlink_btn=show_unlink_btn,
            link_msg=link_msg,
            raw_data_files=raw_data_files,
            raw_data_filetype=raw_data_filetype,
            disable_link_btn=disable_link_btn)