Exemplo n.º 1
0
    def test_create_templates_from_qiime_mapping_file_reverse_linker(self):
        with TRN:
            TRN.add("SELECT last_value FROM "
                    "qiita.prep_template_prep_template_id_seq")
            curr_id = TRN.execute_fetchflatten()[0]
        obs_st, obs_pt = create_templates_from_qiime_mapping_file(
            StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER),
            self.new_study, "16S")

        # Be green: clean the environment
        for template in [obs_st, obs_pt]:
            for _, fp in template.get_filepaths():
                self._clean_up_files.append(fp)

        self.assertEqual(obs_st.id, self.new_study.id)
        self.assertEqual(obs_pt.id, curr_id + 1)

        # Check that each template has the correct columns
        exp = {"physical_specimen_location", "physical_specimen_remaining",
               "dna_extracted", "sample_type", "host_subject_id", "latitude",
               "longitude", "taxon_id", "scientific_name",
               "collection_timestamp", "description"}
        self.assertEqual(set(obs_st.categories()), exp)

        exp = {"barcode", "primer", "center_name", "run_prefix", "platform",
               "library_construction_protocol", "instrument_model",
               "experiment_design_description", "reverselinkerprimer"}
        self.assertEqual(set(obs_pt.categories()), exp)
Exemplo n.º 2
0
    def test_create_templates_from_qiime_mapping_file_reverse_linker(self):
        with TRN:
            TRN.add("SELECT last_value FROM "
                    "qiita.prep_template_prep_template_id_seq")
            curr_id = TRN.execute_fetchflatten()[0]
        obs_st, obs_pt = create_templates_from_qiime_mapping_file(
            StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER), self.new_study,
            "16S")

        # Be green: clean the environment
        for template in [obs_st, obs_pt]:
            for _, fp in template.get_filepaths():
                self._clean_up_files.append(fp)

        self.assertEqual(obs_st.id, self.new_study.id)
        self.assertEqual(obs_pt.id, curr_id + 1)

        # Check that each template has the correct columns
        exp = {
            "physical_specimen_location", "physical_specimen_remaining",
            "dna_extracted", "sample_type", "host_subject_id", "latitude",
            "longitude", "taxon_id", "scientific_name", "collection_timestamp",
            "description"
        }
        self.assertEqual(set(obs_st.categories()), exp)

        exp = {
            "barcode", "primer", "center_name", "run_prefix", "platform",
            "library_construction_protocol", "instrument_model",
            "experiment_design_description", "reverselinkerprimer"
        }
        self.assertEqual(set(obs_pt.categories()), exp)
Exemplo n.º 3
0
 def preprocessed_data(self):
     with TRN:
         sql = """SELECT preprocessed_data_id
                  FROM qiita.prep_template_preprocessed_data
                  WHERE prep_template_id=%s"""
         TRN.add(sql, [self.id])
         return TRN.execute_fetchflatten()
Exemplo n.º 4
0
    def test_full_query_and_insertion(self):
        # let's archive different values from different jobs
        with TRN:
            # 3 - close reference picking
            # 3 - success
            sql = """SELECT processing_job_id
                     FROM qiita.processing_job
                     WHERE command_id = 3 AND processing_job_status_id = 3"""
            TRN.add(sql)
            jobs = TRN.execute_fetchflatten()

            # this is so we can also tests the parent merging scheme
            # 1 - split libraries
            sql = """UPDATE qiita.command_parameter
                     SET check_biom_merge = True
                     WHERE command_id = 1
                        and parameter_name = 'barcode_type'"""
            TRN.add(sql)
            TRN.execute()

            exp_all_features = {}
            for j in jobs:
                featureA = 'AA - %s' % j
                featureB = 'BB - %s' % j

                # testing that nothing is there
                data = {'job_id': j, 'features': [featureA, featureB]}
                obs = self.post(
                    '/qiita_db/archive/observations/', headers=self.header,
                    data=data)
                exp = {}
                self.assertEqual(obs.code, 200)
                self.assertEqual(loads(obs.body), exp)

                # inserting and testing insertion
                data = {'path': j,
                        'value': dumps({featureA: 'CA', featureB: 'CB'})}
                obs = self.patch(
                    '/qiita_db/archive/observations/', headers=self.header,
                    data=data)
                exp = {featureA: 'CA', featureB: 'CB'}
                self.assertEqual(obs.code, 200)
                self.assertEqual(loads(obs.body), exp)

                exp_all_features[featureA] = 'CA'
                exp_all_features[featureB] = 'CB'

            # testing retrieve all featues
            obs = Archive.retrieve_feature_values()
            self.assertEqual(obs, exp_all_features)

            # this doesn't exist so should be empty
            obs = Archive.retrieve_feature_values(archive_merging_scheme='')
            self.assertEqual(obs, {})

            obs = Archive.retrieve_feature_values(
                archive_merging_scheme='Pick closed-reference OTUs | Split '
                'libraries FASTQ (barcode_type: golay_12)')
            self.assertEqual(obs, exp_all_features)
Exemplo n.º 5
0
    def test_execute_fetchflatten(self):
        with TRN:
            sql = """INSERT INTO qiita.test_table (str_column, int_column)
                     VALUES (%s, %s)"""
            args = [["insert1", 1], ["insert2", 2], ["insert3", 3]]
            TRN.add(sql, args, many=True)

            sql = "SELECT str_column, int_column FROM qiita.test_table"
            TRN.add(sql)

            sql = "SELECT int_column FROM qiita.test_table"
            TRN.add(sql)
            obs = TRN.execute_fetchflatten()
            self.assertEqual(obs, [1, 2, 3])

            sql = "SELECT 42"
            TRN.add(sql)
            obs = TRN.execute_fetchflatten(idx=3)
            self.assertEqual(obs, ["insert1", 1, "insert2", 2, "insert3", 3])
Exemplo n.º 6
0
    def _get_sample_ids(self):
        r"""Returns all the available samples for the metadata template

        Returns
        -------
        set of str
            The set of all available sample ids
        """
        with TRN:
            sql = "SELECT sample_id FROM qiita.{0} WHERE {1}=%s".format(self._table, self._id_column)
            TRN.add(sql, [self._id])
            return set(TRN.execute_fetchflatten())
Exemplo n.º 7
0
    def metadata_headers():
        """Returns metadata headers available

        Returns
        -------
        list
            Alphabetical list of all metadata headers available
        """
        with TRN:
            sql = """SELECT DISTINCT column_name
                     FROM qiita.study_sample_columns ORDER BY column_name"""
            TRN.add(sql)
            return TRN.execute_fetchflatten()
Exemplo n.º 8
0
    def __call__(self, searchstr, user):
        """Runs a Study query and returns matching studies and samples

        Parameters
        ----------
        searchstr : str
            Search string to use
        user : User object
            User making the search. Needed for permissions checks.

        Returns
        -------
        dict
            Found samples in format
            {study_id: [[samp_id1, meta1, meta2, ...],
                        [samp_id2, meta1, meta2, ...], ...}
        list
            metadata column names searched for

        Notes
        -----
        Metadata information for each sample is in the same order as the
        metadata columns list returned

        Metadata column names and string searches are case-sensitive
        """
        with TRN:
            study_sql, sample_sql, meta_headers = \
                self._parse_study_search_string(searchstr, True)

            # get all studies containing the metadata headers requested
            TRN.add(study_sql)
            study_ids = set(TRN.execute_fetchflatten())
            # strip to only studies user has access to
            if user.level not in {'admin', 'dev', 'superuser'}:
                study_ids = study_ids.intersection(
                    Study.get_by_status('public') | user.user_studies |
                    user.shared_studies)

            results = {}
            # run search on each study to get out the matching samples
            for sid in study_ids:
                TRN.add(sample_sql.format(sid))
                study_res = TRN.execute_fetchindex()
                if study_res:
                    # only add study to results if actually has samples
                    # in results
                    results[sid] = study_res
            self.results = results
            self.meta_headers = meta_headers
            return results, meta_headers
Exemplo n.º 9
0
def is_test_environment():
    """Checks if Qiita is running in a test environment

    Returns
    -------
    bool
        Whether Qiita is running in a test environment or not

    Notes
    -----
    Qiita is running in a test environment if:
        - It is connected to a test database, AND
        - The config file indicates that this is a test environment
    """
    # Check that we are not in a production environment
    with TRN:
        TRN.add("SELECT test FROM settings")
        test_db = TRN.execute_fetchflatten()[0]
    return qiita_config.test_environment and test_db
Exemplo n.º 10
0
Arquivo: 14.py Projeto: adamrp/qiita
# Feberuary 7, 2015
# This patch recreates all the QIIME mapping files to avoid lower/upper case
# problems. See https://github.com/biocore/qiita/issues/799
#
# heavily based on 7.py

from os.path import basename

from qiita_db.sql_connection import TRN
from qiita_db.metadata_template import PrepTemplate

with TRN:
    sql = "SELECT prep_template_id FROM qiita.prep_template"
    TRN.add(sql)
    all_ids = TRN.execute_fetchflatten()

    # remove all the bad mapping files
    for prep_template_id in all_ids:
        pt = PrepTemplate(prep_template_id)
        fps = pt.get_filepaths()

        # get the QIIME mapping file, note that the way to figure out what is
        # and what's not a qiime mapping file is to check for the existance of
        # the word qiime in the basename of the file path, hacky but that's
        # the way it is being done in qiita_pet/uimodules/raw_data_tab.py
        mapping_files = [f for f in fps if '_qiime_' in basename(f[1])]

        table = 'prep_template_filepath'
        column = 'prep_template_id'

        # unlink all the qiime mapping files for this prep template object
Exemplo n.º 11
0
def calculate(finfo):
    try:
        size = getsize(finfo['fullpath'])
    except (FileNotFoundError, PermissionError):
        return finfo, None, None

    checksum = compute_checksum(finfo['fullpath'])

    return finfo['filepath_id'], checksum, size


# get all filepaths and their filepath information; takes ~10 min
with TRN:
    TRN.add("SELECT filepath_id FROM qiita.filepath")
    files = []
    for fid in TRN.execute_fetchflatten():
        files.append(get_filepath_information(fid))

# just get the filepath ids that haven't been processed, the file format
# of this file is filepath_id[tab]checksum[tab]filesize
fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv')
processed = []
if exists(fpath):
    with open(fpath, 'r') as f:
        processed = [
            int(line.split('\t')[0]) for line in f.read().split('\n')
            if line != ''
        ]
files_curr = [f for f in files if f['filepath_id'] not in processed]

# let's use 20 processor and in each iteration use 120 files
Exemplo n.º 12
0
from qiita_db.sql_connection import TRN

with TRN:
    sql = """SELECT DISTINCT table_name
             FROM information_schema.columns
             WHERE table_name LIKE '%_bk'"""
    TRN.add(sql)
    tables = ['qiita.%s' % t for t in TRN.execute_fetchflatten()]

chunk_size = 200
for i in range(0, len(tables), chunk_size):
    chunk = tables[i:chunk_size + i]
    sql = "DROP TABLE %s" % ', '.join(chunk)
    with TRN:
        TRN.add(sql)
        TRN.execute()
        TRN.commit()
Exemplo n.º 13
0
    def create(cls, md_template, study, data_type, investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        with TRN:
            # If the investigation_type is supplied, make sure it is one of
            # the recognized investigation types
            if investigation_type is not None:
                cls.validate_investigation_type(investigation_type)

            # Check if the data_type is the id or the string
            if isinstance(data_type, (int, long)):
                data_type_id = data_type
                data_type_str = convert_from_id(data_type, "data_type")
            else:
                data_type_id = convert_to_id(data_type, "data_type")
                data_type_str = data_type

            pt_cols = PREP_TEMPLATE_COLUMNS
            if data_type_str in TARGET_GENE_DATA_TYPES:
                pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
                pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

            md_template = cls._clean_validate_template(md_template, study.id,
                                                       pt_cols)

            # Insert the metadata template
            sql = """INSERT INTO qiita.prep_template
                        (data_type_id, investigation_type)
                     VALUES (%s, %s)
                     RETURNING prep_template_id"""
            TRN.add(sql, [data_type_id, investigation_type])
            prep_id = TRN.execute_fetchlast()

            try:
                cls._common_creation_steps(md_template, prep_id)
            except Exception:
                # Check if sample IDs present here but not in sample template
                sql = """SELECT sample_id from qiita.study_sample
                         WHERE study_id = %s"""
                # Get list of study sample IDs, prep template study IDs,
                # and their intersection
                TRN.add(sql, [study.id])
                prep_samples = set(md_template.index.values)
                unknown_samples = prep_samples.difference(
                    TRN.execute_fetchflatten())
                if unknown_samples:
                    raise QiitaDBExecutionError(
                        'Samples found in prep template but not sample '
                        'template: %s' % ', '.join(unknown_samples))

                # some other error we haven't seen before so raise it
                raise

            # Link the prep template with the study
            sql = """INSERT INTO qiita.study_prep_template
                        (study_id, prep_template_id)
                     VALUES (%s, %s)"""
            TRN.add(sql, [study.id, prep_id])

            TRN.execute()

            pt = cls(prep_id)
            pt.generate_files()

            return pt
Exemplo n.º 14
0
# Nov 28, 2017 (only in py file)
# Adding a new command into Qiita/Alpha: delete_analysis

from qiita_db.software import Software, Command
from qiita_db.sql_connection import TRN

# Create the delete study command
Command.create(Software.from_name_and_version('Qiita',
                                              'alpha'), 'delete_analysis',
               'Deletes a full analysis', {'analysis_id': ['integer', None]})

# Make sure that all validate commands have the "analysis" parameter
with TRN:
    # Get all validate commands that are missing the analysis parameter
    sql = """SELECT command_id
             FROM qiita.software_command sc
             WHERE name = 'Validate' AND NOT (
                SELECT EXISTS(SELECT *
                              FROM qiita.command_parameter
                              WHERE parameter_name = 'analysis'
                                AND command_id = sc.command_id));"""
    TRN.add(sql)
    sql = """INSERT INTO qiita.command_parameter
                (command_id, parameter_name, parameter_type,
                 required, default_value, name_order, check_biom_merge)
             VALUES (6, 'analysis', 'analysis', false, NULL, NULL, false)"""
    sql_params = [[cmd_id, 'analysis', 'analysis', False, None, None, False]
                  for cmd_id in TRN.execute_fetchflatten()]
    TRN.add(sql, sql_params, many=True)
    TRN.execute()
Exemplo n.º 15
0
 def postgres_test(**kwargs):
     """Open a connection and query postgres"""
     from qiita_db.sql_connection import TRN
     with TRN:
         TRN.add("SELECT 42")
         return TRN.execute_fetchflatten()[0]
Exemplo n.º 16
0
Arquivo: 23.py Projeto: adamrp/qiita
# Mar 27, 2015
# Need to re-generate the files, given that some headers have changed

from qiita_db.sql_connection import TRN
from qiita_db.metadata_template import SampleTemplate, PrepTemplate

with TRN:
    # Get all the sample templates
    TRN.add("SELECT DISTINCT study_id from qiita.study_sample")
    study_ids = TRN.execute_fetchflatten()

    for s_id in study_ids:
        SampleTemplate(s_id).generate_files()

    # Get all the prep templates
    TRN.add("SELECT DISTINCT prep_template_id from qiita.prep_template")
    prep_ids = TRN.execute_fetchflatten()
    for prep_id in prep_ids:
        PrepTemplate(prep_id).generate_files()
Exemplo n.º 17
0
Arquivo: 38.py Projeto: qiyunzhu/qiita
from qiita_db.sql_connection import TRN


# Due to the size of these changes we will
with TRN:
    # select all table and column names from all sample template
    sql = """SELECT DISTINCT table_name FROM information_schema.columns
                WHERE (table_name LIKE 'sample_%'
                       OR table_name LIKE 'prep_%')
                    AND table_name NOT LIKE '%template%'"""
    TRN.add(sql)

    all_tables = TRN.execute_fetchflatten()

for table in all_tables:
    with TRN:
        sql = """SELECT column_name FROM information_schema.columns
                    WHERE table_name = %s
                    ORDER BY column_name"""
        TRN.add(sql, [table])

        for column in TRN.execute_fetchflatten():
            sql = "ALTER TABLE qiita.%s ALTER COLUMN %s TYPE VARCHAR" % (
                table, column)
            TRN.add(sql)

        TRN.execute()
Exemplo n.º 18
0
Arquivo: 30.py Projeto: jenwei/qiita
nans = tuple(NA_VALUES)
false_vals = tuple(FALSE_VALUES)
true_vals = tuple(TRUE_VALUES)

st_update = set()
pr_update = set()

with TRN:
    sql = """SELECT table_name
             FROM information_schema.tables
             WHERE table_schema='qiita'
                AND (table_name SIMILAR TO 'sample\_[0-9]+'
                     OR table_name SIMILAR TO 'prep\_[0-9]+')"""
    TRN.add(sql)
    tables = TRN.execute_fetchflatten()

    cols_sql = """SELECT column_name
                  FROM information_schema.columns
                  WHERE table_name = %s
                  AND data_type = 'character varying'"""
    alter_sql = """ALTER TABLE qiita.{0}
                   ALTER COLUMN {1} TYPE bool
                   USING CASE
                       WHEN {1} IN %s THEN FALSE
                       WHEN {1} IN %s THEN TRUE
                   END"""
    null_sql = "UPDATE qiita.{0} SET {1} = NULL WHERE {1} IN %s"
    ssc_update_sql = """UPDATE qiita.study_sample_columns
                        SET column_type = 'bool'
                        WHERE study_id = %s AND column_name = %s"""
Exemplo n.º 19
0
Arquivo: 25.py Projeto: adamrp/qiita
# make the RawData to be effectively just a container for the raw files,
# which is how it was acting previously.

from qiita_db.sql_connection import TRN
from qiita_db.data import RawData
from qiita_db.util import move_filepaths_to_upload_folder

with TRN:
    # the system may contain raw data with no prep template associated to it.
    # Retrieve all those raw data ids
    sql = """SELECT raw_data_id
             FROM qiita.raw_data
             WHERE raw_data_id NOT IN (
                SELECT DISTINCT raw_data_id FROM qiita.prep_template);"""
    TRN.add(sql)
    rd_ids = TRN.execute_fetchflatten()

    # We will delete those RawData. However, if they have files attached, we
    # should move them to the uploads folder of the study
    sql_detach = """DELETE FROM qiita.study_raw_data
                    WHERE raw_data_id = %s AND study_id = %s"""
    sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
    sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
    sql_studies = """SELECT study_id FROM qiita.study_raw_data
                     WHERE raw_data_id = %s"""
    move_files = []
    for rd_id in rd_ids:
        rd = RawData(rd_id)
        filepaths = rd.get_filepaths()
        TRN.add(sql_studies, [rd_id])
        studies = TRN.execute_fetchflatten()
Exemplo n.º 20
0
Arquivo: 7.py Projeto: adamrp/qiita
# 23 Nov, 2014
# This patch creates all the qiime mapping files for the existing
# prep templates

from qiita_db.util import get_mountpoint
from qiita_db.sql_connection import TRN
from qiita_db.metadata_template import PrepTemplate

with TRN:
    _id, fp_base = get_mountpoint('templates')[0]

    TRN.add("SELECT prep_template_id FROM qiita.prep_template")
    for prep_template_id in TRN.execute_fetchflatten():
        pt = PrepTemplate(prep_template_id)
        study_id = pt.study_id

        for _, fpt in pt.get_filepaths():
            pt.create_qiime_mapping_file(fpt)
Exemplo n.º 21
0
import pandas as pd
from os.path import join, dirname, abspath, exists
from qiita_db.sql_connection import TRN

with TRN:
    sql = """SELECT filepath_id
             FROM qiita.filepath"""
    TRN.add(sql)
    fids = TRN.execute_fetchflatten()

fpath = join(dirname(abspath(__file__)), 'support_files', 'patches',
             'python_patches', '74.py.cache.tsv')
if not exists(fpath):
    raise ValueError("%s doesn't exits, have you run step 1?" % fpath)
df = pd.read_csv(fpath,
                 sep='\t',
                 index_col=0,
                 dtype=str,
                 names=['filepath_id', 'checksum', 'fp_size'])
cache = df.to_dict('index')

args = []
for fid in fids:
    if fid not in cache:
        print('missing: %d', fid)
    else:
        args.append([cache[fid]['fp_size'], cache[fid]['checksum'], fid])

with TRN:
    sql = """UPDATE qiita.filepath
            SET fp_size = %s, checksum = %s
Exemplo n.º 22
0
 def postgres_test(**kwargs):
     """Open a connection and query postgres"""
     from qiita_db.sql_connection import TRN
     with TRN:
         TRN.add("SELECT 42")
         return TRN.execute_fetchflatten()[0]
Exemplo n.º 23
0
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from qiita_db.sql_connection import TRN


# Due to the size of these changes we will
with TRN:
    # select all table and column names from all sample template
    sql = """SELECT DISTINCT table_name FROM information_schema.columns
                WHERE (table_name LIKE 'sample_%'
                       OR table_name LIKE 'prep_%')
                    AND table_name NOT LIKE '%template%'"""
    TRN.add(sql)

    all_tables = TRN.execute_fetchflatten()

for table in all_tables:
    with TRN:
        sql = """SELECT column_name FROM information_schema.columns
                    WHERE table_name = %s
                    ORDER BY column_name"""
        TRN.add(sql, [table])

        for column in TRN.execute_fetchflatten():
            sql = "ALTER TABLE qiita.%s ALTER COLUMN %s TYPE VARCHAR" % (
                table, column)
            TRN.add(sql)

        TRN.execute()
Exemplo n.º 24
0
def calculate(finfo):
    try:
        size = getsize(finfo['fullpath'])
    except (FileNotFoundError, PermissionError):
        return finfo, None, None

    checksum = compute_checksum(finfo['fullpath'])

    return finfo['filepath_id'], checksum, size


# get all filepaths and their filepath information; takes ~10 min
with TRN:
    TRN.add("SELECT filepath_id FROM qiita.filepath")
    files = []
    for fid in TRN.execute_fetchflatten():
        files.append(get_filepath_information(fid))


# just get the filepath ids that haven't been processed, the file format
# of this file is filepath_id[tab]checksum[tab]filesize
fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv')
processed = []
if exists(fpath):
    with open(fpath, 'r') as f:
        processed = [int(line.split('\t')[0])
                     for line in f.read().split('\n') if line != '']
files_curr = [f for f in files if f['filepath_id'] not in processed]

# let's use 20 processor and in each iteration use 120 files
fids = 120