def exists(cls, sample_id, md_template): r"""Checks if already exists a MetadataTemplate for the provided object Parameters ---------- sample_id : str The sample id md_template : MetadataTemplate The metadata template to which the sample belongs to Returns ------- bool True if already exists. False otherwise. """ with TRN: cls._check_subclass() sql = """SELECT EXISTS( SELECT * FROM qiita.{0} WHERE sample_id=%s AND {1}=%s )""".format( cls._table, cls._id_column ) TRN.add(sql, [sample_id, md_template.id]) return TRN.execute_fetchlast()
def __getitem__(self, key): r"""Returns the value of the metadata category `key` Parameters ---------- key : str The metadata category Returns ------- obj The value of the metadata category `key` Raises ------ KeyError If the metadata category `key` does not exists See Also -------- get """ with TRN: key = key.lower() if key not in self._get_categories(): # The key is not available for the sample, so raise a KeyError raise KeyError( "Metadata category %s does not exists for sample %s" " in template %d" % (key, self._id, self._md_template.id)) sql = """SELECT {0} FROM qiita.{1} WHERE sample_id=%s""".format(key, self._dynamic_table) TRN.add(sql, [self._id]) return TRN.execute_fetchlast()
def _check_id(self, id_): r"""Checks that the MetadataTemplate id_ exists on the database""" with TRN: sql = "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE {1}=%s)".format( self._table, self._id_column) TRN.add(sql, [id_]) return TRN.execute_fetchlast()
def update_category(self, category, samples_and_values): """Update an existing column Parameters ---------- category : str The category to update samples_and_values : dict A mapping of {sample_id: value} Raises ------ QiitaDBUnknownIDError If a sample_id is included in values that is not in the template QiitaDBColumnError If the column does not exist in the table. This is implicit, and can be thrown by the contained Samples. ValueError If one of the new values cannot be inserted in the DB due to different types """ with TRN: if not set(self.keys()).issuperset(samples_and_values): missing = set(self.keys()) - set(samples_and_values) table_name = self._table_name(self._id) raise QiitaDBUnknownIDError(missing, table_name) for k, v in viewitems(samples_and_values): sample = self[k] sample.setitem(category, v) try: TRN.execute() except ValueError as e: # catching error so we can check if the error is due to # different column type or something else value_types = set(type_lookup(type(value)) for value in viewvalues(samples_and_values)) sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" TRN.add(sql, [category, self._table, self._table_name(self._id)]) column_type = TRN.execute_fetchlast() if any([column_type != vt for vt in value_types]): value_str = ", ".join([str(value) for value in viewvalues(samples_and_values)]) value_types_str = ", ".join(value_types) raise ValueError( 'The new values being added to column: "%s" are "%s" ' '(types: "%s"). However, this column in the DB is of ' 'type "%s". Please change the values in your updated ' "template or reprocess your template." % (category, value_str, value_types_str, column_type) ) raise e
def tester(): self.assertEqual(TRN._contexts_entered, 1) with TRN: self.assertEqual(TRN._contexts_entered, 2) sql = """SELECT EXISTS( SELECT * FROM qiita.test_table WHERE int_column=%s)""" TRN.add(sql, [2]) self.assertTrue(TRN.execute_fetchlast()) self.assertEqual(TRN._contexts_entered, 1)
def test_execute_fetchlast(self): with TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] TRN.add(sql, args, many=True) sql = """SELECT EXISTS( SELECT * FROM qiita.test_table WHERE int_column=%s)""" TRN.add(sql, [2]) self.assertTrue(TRN.execute_fetchlast())
def transfer_file_to_artifact(analysis_id, a_timestamp, command_id, data_type_id, params, artifact_type_id, filepath_id): """Creates a new artifact with the given filepath id Parameters ---------- analysis_id : int The analysis id to attach the artifact a_timestamp : datetime.datetime The generated timestamp of the artifact command_id : int The command id of the artifact data_type_id : int The data type id of the artifact params : str The parameters of the artifact artifact_type_id : int The artifact type filepath_id : int The filepath id Returns ------- int The artifact id """ with TRN: # Add the row in the artifact table # Magic number 4: Visibility -> sandbox sql = """INSERT INTO qiita.artifact (generated_timestamp, command_id, data_type_id, command_parameters, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING artifact_id""" TRN.add(sql, [ a_timestamp, command_id, data_type_id, params, 4, artifact_type_id, False ]) artifact_id = TRN.execute_fetchlast() # Link the artifact with its file sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, filepath_id]) # Link the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis_id, artifact_id]) return artifact_id
def study_id(self): """Gets the study id with which this prep template is associated Returns ------- int The ID of the study with which this prep template is associated """ with TRN: sql = """SELECT study_id FROM qiita.study_prep_template WHERE prep_template_id=%s""" TRN.add(sql, [self.id]) return TRN.execute_fetchlast()
def preprocessing_status(self): r"""Tells if the data has been preprocessed or not Returns ------- str One of {'not_preprocessed', 'preprocessing', 'success', 'failed'} """ with TRN: sql = """SELECT preprocessing_status FROM qiita.prep_template WHERE {0}=%s""".format(self._id_column) TRN.add(sql, [self.id]) return TRN.execute_fetchlast()
def transfer_file_to_artifact(analysis_id, a_timestamp, command_id, data_type_id, params, artifact_type_id, filepath_id): """Creates a new artifact with the given filepath id Parameters ---------- analysis_id : int The analysis id to attach the artifact a_timestamp : datetime.datetime The generated timestamp of the artifact command_id : int The command id of the artifact data_type_id : int The data type id of the artifact params : str The parameters of the artifact artifact_type_id : int The artifact type filepath_id : int The filepath id Returns ------- int The artifact id """ with TRN: # Add the row in the artifact table # Magic number 4: Visibility -> sandbox sql = """INSERT INTO qiita.artifact (generated_timestamp, command_id, data_type_id, command_parameters, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING artifact_id""" TRN.add(sql, [a_timestamp, command_id, data_type_id, params, 4, artifact_type_id, False]) artifact_id = TRN.execute_fetchlast() # Link the artifact with its file sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, filepath_id]) # Link the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis_id, artifact_id]) return artifact_id
def delete(cls, id_): r"""Deletes the table from the database Parameters ---------- id_ : integer The object identifier Raises ------ QiitaDBUnknownIDError If no sample template with id id_ exists QiitaDBError If the study that owns this sample template has raw datas """ with TRN: cls._check_subclass() if not cls.exists(id_): raise QiitaDBUnknownIDError(id_, cls.__name__) # Check if there is any PrepTemplate sql = """SELECT EXISTS(SELECT * FROM qiita.study_prep_template WHERE study_id=%s)""" TRN.add(sql, [id_]) has_prep_templates = TRN.execute_fetchlast() if has_prep_templates: raise QiitaDBError("Sample template can not be erased because " "there are prep templates associated.") table_name = cls._table_name(id_) # Delete the sample template filepaths sql = """DELETE FROM qiita.sample_template_filepath WHERE study_id = %s""" args = [id_] TRN.add(sql, args) TRN.add("DROP TABLE qiita.{0}".format(table_name)) sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format( cls._table, cls._id_column) TRN.add(sql, args) sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format( cls._column_table, cls._id_column) TRN.add(sql, args) TRN.execute()
def raw_data(self, raw_data): with TRN: sql = """SELECT ( SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s) IS NOT NULL""" TRN.add(sql, [self.id]) exists = TRN.execute_fetchlast() if exists: raise QiitaDBError( "Prep template %d already has a raw data associated" % self.id) sql = """UPDATE qiita.prep_template SET raw_data_id = %s WHERE prep_template_id = %s""" TRN.add(sql, [raw_data.id, self.id]) TRN.execute()
def __setitem__(self, column, value): r"""Sets the metadata value for the category `column` Parameters ---------- column : str The column to update value : str The value to set. This is expected to be a str on the assumption that psycopg2 will cast as necessary when updating. Raises ------ ValueError If the value type does not match the one in the DB """ with TRN: self.setitem(column, value) try: TRN.execute() except ValueError as e: # catching error so we can check if the error is due to # different column type or something else value_type = type_lookup(type(value)) sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" TRN.add(sql, [column, self._table, self._dynamic_table]) column_type = TRN.execute_fetchlast() if column_type != value_type: raise ValueError( 'The new value being added to column: "{0}" is "{1}" ' '(type: "{2}"). However, this column in the DB is of ' 'type "{3}". Please change the value in your updated ' 'template or reprocess your template.'.format( column, value, value_type, column_type)) raise e
def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id): """Create a new rarefaction job Parameters ---------- depth : int The rarefaction depth biom_artifact_id : int The artifact id of the input rarefaction biom table analysis : dict Dictionary with the analysis information srare_cmd_id : int The command id of the single rarefaction command Returns ------- job_id : str The job id params : str The job parameters """ # Add the row in the procesisng job table params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}' % (depth, biom_artifact_id)) with TRN: # magic number 3: status -> success sql = """INSERT INTO qiita.processing_job (email, command_id, command_parameters, processing_job_status_id) VALUES (%s, %s, %s, %s) RETURNING processing_job_id""" TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3]) job_id = TRN.execute_fetchlast() # Step 1.2.b: Link the job with the input artifact sql = """INSERT INTO qiita.artifact_processing_job (artifact_id, processing_job_id) VALUES (%s, %s)""" TRN.add(sql, [biom_artifact_id, job_id]) TRN.execute() return job_id, params
def data_type(self, ret_id=False): """Returns the data_type or the data_type id Parameters ---------- ret_id : bool, optional If true, return the id instead of the string, default false. Returns ------- str or int string value of data_type or data_type_id if ret_id is True """ with TRN: ret = "_id" if ret_id else "" sql = """SELECT d.data_type{0} FROM qiita.data_type d JOIN qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE p.prep_template_id=%s""".format(ret) TRN.add(sql, [self.id]) return TRN.execute_fetchlast()
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table): """Creates the initial non-rarefied BIOM artifact of the analysis Parameters ---------- analysis : dict Dictionary with the analysis information biom_data : dict Dictionary with the biom file information rarefied_table : biom.Table The rarefied BIOM table Returns ------- int The id of the new artifact """ # The non rarefied biom artifact is the initial biom table of the analysis. # This table does not currently exist anywhere, so we need to actually # create the BIOM file. To create this BIOM file we need: (1) the samples # and artifacts they come from and (2) whether the samples where # renamed or not. (1) is on the database, but we need to inferr (2) from # the existing rarefied BIOM table. Fun, fun... with TRN: # Get the samples included in the BIOM table grouped by artifact id # Note that the analysis contains a BIOM table per data type included # in it, and the table analysis_sample does not differentiate between # datatypes, so we need to check the data type in the artifact table sql = """SELECT artifact_id, array_agg(sample_id) FROM qiita.analysis_sample JOIN qiita.artifact USING (artifact_id) WHERE analysis_id = %s AND data_type_id = %s GROUP BY artifact_id""" TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']]) samples_by_artifact = TRN.execute_fetchindex() # Create an empty BIOM table to be the new master table new_table = Table([], [], []) ids_map = {} for a_id, samples in samples_by_artifact: # Get the filepath of the BIOM table from the artifact artifact = Artifact(a_id) biom_fp = None for _, fp, fp_type in artifact.filepaths: if fp_type == 'biom': biom_fp = fp # Note that we are sure that the biom table exists for sure, so # no need to check if biom_fp is undefined biom_table = load_table(biom_fp) samples = set(samples).intersection(biom_table.ids()) biom_table.filter(samples, axis='sample', inplace=True) # we need to check if the table has samples left before merging if biom_table.shape[0] != 0 and biom_table.shape[1] != 0: new_table = new_table.merge(biom_table) ids_map.update({sid: "%d.%s" % (a_id, sid) for sid in biom_table.ids()}) # Check if we need to rename the sample ids in the biom table new_table_ids = set(new_table.ids()) if not new_table_ids.issuperset(rarefied_table.ids()): # We need to rename the sample ids new_table.update_ids(ids_map, 'sample', True, True) sql = """INSERT INTO qiita.artifact (generated_timestamp, data_type_id, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_id""" # Magic number 4 -> visibility sandbox # Magix number 7 -> biom artifact type TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'], 4, 7, False]) artifact_id = TRN.execute_fetchlast() # Associate the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis['analysis_id'], artifact_id]) # Link the artifact with its file dd_id, mp = get_mountpoint('BIOM')[0] dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id)) if not exists(dir_fp): makedirs(dir_fp) new_table_fp = join(dir_fp, "biom_table.biom") with biom_open(new_table_fp, 'w') as f: new_table.to_hdf5(f, "Generated by Qiita") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (%s, %s, %s, %s, %s) RETURNING filepath_id""" # Magic number 7 -> filepath_type_id = 'biom' # Magic number 1 -> the checksum algorithm id TRN.add(sql, [basename(new_table_fp), 7, compute_checksum(new_table_fp), 1, dd_id]) fp_id = TRN.execute_fetchlast() sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, fp_id]) TRN.execute() return artifact_id
def transfer_job(analysis, command_id, params, input_artifact_id, job_data, cmd_out_id, biom_data, output_artifact_type_id): """Transfers the job from the old structure to the plugin structure Parameters ---------- analysis : dict The analysis information command_id : int The id of the command executed params : str The parameters used in the job input_artifact_id : int The id of the input artifact job_data : dict The job information cmd_out_id : int The id of the command's output biom_data : dict The biom information output_artifact_type_id : int The type of the output artifact """ with TRN: # Create the job # Add the row in the processing job table # Magic number 3: status -> success sql = """INSERT INTO qiita.processing_job (email, command_id, command_parameters, processing_job_status_id) VALUES (%s, %s, %s, %s) RETURNING processing_job_id""" TRN.add(sql, [analysis['email'], command_id, params, 3]) job_id = TRN.execute_fetchlast() # Link the job with the input artifact sql = """INSERT INTO qiita.artifact_processing_job (artifact_id, processing_job_id) VALUES (rarefied_biom_id, proc_job_id)""" TRN.add(sql, [input_artifact_id, job_id]) # Check if the executed job has results and add them sql = """SELECT EXISTS(SELECT * FROM qiita.job_results_filepath WHERE job_id = %s)""" TRN.add(sql, [job_data['job_id']]) if TRN.execute_fetchlast(): # There are results for the current job. # Transfer the job files to a new artifact sql = """SELECT filepath_id FROM qiita.job_results_filepath WHERE job_id = %s""" TRN.add(sql, job_data['job_id']) filepath_id = TRN.execute_fetchlast() artifact_id = transfer_file_to_artifact( analysis['analysis_id'], analysis['timestamp'], command_id, biom_data['data_type_id'], params, output_artifact_type_id, filepath_id) # Link the artifact with its parent sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, input_artifact_id]) # Link the artifact as the job output sql = """INSERT INTO qiita.artifact_output_processing_job (artifact_id, processing_job_id, command_output_id) VALUES (%s, %s, %s)""" TRN.add(sql, [artifact_id, job_id, cmd_out_id]) TRN.exeucte() else: # There are no results on the current job, so mark it as # error if job_data.log_id is None: # Magic number 2 - we are not using any other severity # level, so keep using number 2 sql = """INSERT INTO qiita.logging (time, severity_id, msg) VALUES (%s, %s, %s) RETURNING logging_id""" TRN.add(sql, [analysis['timestamp'], 2, "Unknown error - patch 47"]) else: log_id = job_data['log_id'] # Magic number 4 -> status -> error sql = """UPDATE qiita.processing_job SET processing_job_status_id = 4, logging_id = %s WHERE processing_job_id = %s""" TRN.add(sql, [log_id, job_id])
# artifacts: (1) distance matrix -> which will include the distance matrix, # the principal coordinates and the emperor plots; (2) rarefaction # curves -> which will include all the files generated by alpha rarefaction # and (3) taxonomy summary, which will include all the files generated # by summarize_taxa_through_plots.py with TRN: # Add the new artifact types sql = """INSERT INTO qiita.artifact_type ( artifact_type, description, can_be_submitted_to_ebi, can_be_submitted_to_vamps) VALUES (%s, %s, %s, %s) RETURNING artifact_type_id""" TRN.add(sql, ['beta_div_plots', 'Qiime 1 beta diversity results', False, False]) dm_atype_id = TRN.execute_fetchlast() TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False]) rc_atype_id = TRN.execute_fetchlast() TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False]) ts_atype_id = TRN.execute_fetchlast() # Associate each artifact with the filetypes that it accepts # At this time we are going to add them as directories, just as it is done # right now. We can make it fancier with the new type system. # Magic number 8: the filepath_type_id for the directory sql = """INSERT INTO qiita.artifact_type_filepath_type (artifact_type_id, filepath_type_id, required) VALUES (%s, %s, %s)""" sql_args = [[dm_atype_id, 8, True], [rc_atype_id, 8, True], [ts_atype_id, 8, True]]
def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table): """Creates the initial non-rarefied BIOM artifact of the analysis Parameters ---------- analysis : dict Dictionary with the analysis information biom_data : dict Dictionary with the biom file information rarefied_table : biom.Table The rarefied BIOM table Returns ------- int The id of the new artifact """ # The non rarefied biom artifact is the initial biom table of the analysis. # This table does not currently exist anywhere, so we need to actually # create the BIOM file. To create this BIOM file we need: (1) the samples # and artifacts they come from and (2) whether the samples where # renamed or not. (1) is on the database, but we need to inferr (2) from # the existing rarefied BIOM table. Fun, fun... with TRN: # Get the samples included in the BIOM table grouped by artifact id # Note that the analysis contains a BIOM table per data type included # in it, and the table analysis_sample does not differentiate between # datatypes, so we need to check the data type in the artifact table sql = """SELECT artifact_id, array_agg(sample_id) FROM qiita.analysis_sample JOIN qiita.artifact USING (artifact_id) WHERE analysis_id = %s AND data_type_id = %s GROUP BY artifact_id""" TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']]) samples_by_artifact = TRN.execute_fetchindex() # Create an empty BIOM table to be the new master table new_table = Table([], [], []) ids_map = {} for a_id, samples in samples_by_artifact: # Get the filepath of the BIOM table from the artifact artifact = Artifact(a_id) biom_fp = None for _, fp, fp_type in artifact.filepaths: if fp_type == 'biom': biom_fp = fp # Note that we are sure that the biom table exists for sure, so # no need to check if biom_fp is undefined biom_table = load_table(biom_fp) samples = set(samples).intersection(biom_table.ids()) biom_table.filter(samples, axis='sample', inplace=True) # we need to check if the table has samples left before merging if biom_table.shape[0] != 0 and biom_table.shape[1] != 0: new_table = new_table.merge(biom_table) ids_map.update( {sid: "%d.%s" % (a_id, sid) for sid in biom_table.ids()}) # Check if we need to rename the sample ids in the biom table new_table_ids = set(new_table.ids()) if not new_table_ids.issuperset(rarefied_table.ids()): # We need to rename the sample ids new_table.update_ids(ids_map, 'sample', True, True) sql = """INSERT INTO qiita.artifact (generated_timestamp, data_type_id, visibility_id, artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_id""" # Magic number 4 -> visibility sandbox # Magix number 7 -> biom artifact type TRN.add( sql, [analysis['timestamp'], biom_data['data_type_id'], 4, 7, False]) artifact_id = TRN.execute_fetchlast() # Associate the artifact with the analysis sql = """INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES (%s, %s)""" TRN.add(sql, [analysis['analysis_id'], artifact_id]) # Link the artifact with its file dd_id, mp = get_mountpoint('BIOM')[0] dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id)) if not exists(dir_fp): makedirs(dir_fp) new_table_fp = join(dir_fp, "biom_table.biom") with biom_open(new_table_fp, 'w') as f: new_table.to_hdf5(f, "Generated by Qiita") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (%s, %s, %s, %s, %s) RETURNING filepath_id""" # Magic number 7 -> filepath_type_id = 'biom' # Magic number 1 -> the checksum algorithm id TRN.add(sql, [ basename(new_table_fp), 7, compute_checksum(new_table_fp), 1, dd_id ]) fp_id = TRN.execute_fetchlast() sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" TRN.add(sql, [artifact_id, fp_id]) TRN.execute() return artifact_id
def create_command(software, name, description, parameters, outputs=None, analysis_only=False): r"""Replicates the Command.create code at the time the patch was written""" # Perform some sanity checks in the parameters dictionary if not parameters: raise QiitaDBError( "Error creating command %s. At least one parameter should " "be provided." % name) sql_param_values = [] sql_artifact_params = [] for pname, vals in parameters.items(): if len(vals) != 2: raise QiitaDBError( "Malformed parameters dictionary, the format should be " "{param_name: [parameter_type, default]}. Found: " "%s for parameter name %s" % (vals, pname)) ptype, dflt = vals # Check that the type is one of the supported types supported_types = ['string', 'integer', 'float', 'reference', 'boolean', 'prep_template', 'analysis'] if ptype not in supported_types and not ptype.startswith( ('choice', 'mchoice', 'artifact')): supported_types.extend(['choice', 'mchoice', 'artifact']) raise QiitaDBError( "Unsupported parameters type '%s' for parameter %s. " "Supported types are: %s" % (ptype, pname, ', '.join(supported_types))) if ptype.startswith(('choice', 'mchoice')) and dflt is not None: choices = set(loads(ptype.split(':')[1])) dflt_val = dflt if ptype.startswith('choice'): # In the choice case, the dflt value is a single string, # create a list with it the string on it to use the # issuperset call below dflt_val = [dflt_val] else: # jsonize the list to store it in the DB dflt = dumps(dflt) if not choices.issuperset(dflt_val): raise QiitaDBError( "The default value '%s' for the parameter %s is not " "listed in the available choices: %s" % (dflt, pname, ', '.join(choices))) if ptype.startswith('artifact'): atypes = loads(ptype.split(':')[1]) sql_artifact_params.append( [pname, 'artifact', atypes]) else: if dflt is not None: sql_param_values.append([pname, ptype, False, dflt]) else: sql_param_values.append([pname, ptype, True, None]) with TRN: sql = """SELECT EXISTS(SELECT * FROM qiita.software_command WHERE software_id = %s AND name = %s)""" TRN.add(sql, [software.id, name]) if TRN.execute_fetchlast(): raise QiitaDBDuplicateError( "command", "software: %d, name: %s" % (software.id, name)) # Add the command to the DB sql = """INSERT INTO qiita.software_command (name, software_id, description, is_analysis) VALUES (%s, %s, %s, %s) RETURNING command_id""" sql_params = [name, software.id, description, analysis_only] TRN.add(sql, sql_params) c_id = TRN.execute_fetchlast() # Add the parameters to the DB sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, %s, %s, %s, %s) RETURNING command_parameter_id""" sql_params = [[c_id, pname, p_type, reqd, default] for pname, p_type, reqd, default in sql_param_values] TRN.add(sql, sql_params, many=True) TRN.execute() # Add the artifact parameters sql_type = """INSERT INTO qiita.parameter_artifact_type (command_parameter_id, artifact_type_id) VALUES (%s, %s)""" supported_types = [] for pname, p_type, atypes in sql_artifact_params: sql_params = [c_id, pname, p_type, True, None] TRN.add(sql, sql_params) pid = TRN.execute_fetchlast() sql_params = [[pid, convert_to_id(at, 'artifact_type')] for at in atypes] TRN.add(sql_type, sql_params, many=True) supported_types.extend([atid for _, atid in sql_params]) # If the software type is 'artifact definition', there are a couple # of extra steps if software.type == 'artifact definition': # If supported types is not empty, link the software with these # types if supported_types: sql = """INSERT INTO qiita.software_artifact_type (software_id, artifact_type_id) VALUES (%s, %s)""" sql_params = [[software.id, atid] for atid in supported_types] TRN.add(sql, sql_params, many=True) # If this is the validate command, we need to add the # provenance and name parameters. These are used internally, # that's why we are adding them here if name == 'Validate': sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, 'name', 'string', 'False', 'dflt_name'), (%s, 'provenance', 'string', 'False', NULL) """ TRN.add(sql, [c_id, c_id]) # Add the outputs to the command if outputs: sql = """INSERT INTO qiita.command_output (name, command_id, artifact_type_id) VALUES (%s, %s, %s)""" sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')] for pname, at in outputs.items()] TRN.add(sql, sql_args, many=True) TRN.execute() return Command(c_id)
from os.path import realpath from qiita_db.sql_connection import TRN with TRN: TRN.add('SELECT base_data_dir FROM settings') path = TRN.execute_fetchlast() # if the path is non-canonical (it contains .. or other redundant symbols) # this will update it, else it will leave as is TRN.add("UPDATE settings SET base_data_dir = %s", (realpath(path),)) TRN.execute()
def delete(cls, id_): r"""Deletes the table from the database Parameters ---------- id_ : obj The object identifier Raises ------ QiitaDBExecutionError If the prep template already has a preprocessed data If the prep template has a raw data attached QiitaDBUnknownIDError If no prep template with id = id_ exists """ with TRN: table_name = cls._table_name(id_) if not cls.exists(id_): raise QiitaDBUnknownIDError(id_, cls.__name__) sql = """SELECT EXISTS( SELECT * FROM qiita.prep_template_preprocessed_data WHERE prep_template_id=%s)""" args = [id_] TRN.add(sql, args) preprocessed_data_exists = TRN.execute_fetchlast() if preprocessed_data_exists: raise QiitaDBExecutionError( "Cannot remove prep template %d because a preprocessed " "data has been already generated using it." % id_) sql = """SELECT ( SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s) IS NOT NULL""" TRN.add(sql, args) raw_data_attached = TRN.execute_fetchlast() if raw_data_attached: raise QiitaDBExecutionError( "Cannot remove prep template %d because it has raw data " "associated with it" % id_) # Delete the prep template filepaths sql = """DELETE FROM qiita.prep_template_filepath WHERE prep_template_id = %s""" TRN.add(sql, args) # Drop the prep_X table TRN.add("DROP TABLE qiita.{0}".format(table_name)) # Remove the rows from prep_template_samples sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format( cls._table, cls._id_column) TRN.add(sql, args) # Remove the rows from prep_columns sql = "DELETE FROM qiita.{0} where {1} = %s".format( cls._column_table, cls._id_column) TRN.add(sql, args) # Remove the row from study_prep_template sql = """DELETE FROM qiita.study_prep_template WHERE {0} = %s""".format(cls._id_column) TRN.add(sql, args) # Remove the row from prep_template sql = "DELETE FROM qiita.prep_template WHERE {0} = %s".format( cls._id_column) TRN.add(sql, args) TRN.execute()
# artifacts: (1) distance matrix -> which will include the distance matrix, # the principal coordinates and the emperor plots; (2) rarefaction # curves -> which will include all the files generated by alpha rarefaction # and (3) taxonomy summary, which will include all the files generated # by summarize_taxa_through_plots.py with TRN: # Add the new artifact types sql = """INSERT INTO qiita.artifact_type ( artifact_type, description, can_be_submitted_to_ebi, can_be_submitted_to_vamps) VALUES (%s, %s, %s, %s) RETURNING artifact_type_id""" TRN.add(sql, ['beta_div_plots', 'Qiime 1 beta diversity results', False, False]) dm_atype_id = TRN.execute_fetchlast() TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False]) rc_atype_id = TRN.execute_fetchlast() TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False]) ts_atype_id = TRN.execute_fetchlast() # Associate each artifact with the filetypes that it accepts # At this time we are going to add them as directories, just as it is done # right now. We can make it fancier with the new type system. # Magic number 8: the filepath_type_id for the directory sql = """INSERT INTO qiita.artifact_type_filepath_type (artifact_type_id, filepath_type_id, required) VALUES (%s, %s, %s)""" sql_args = [[dm_atype_id, 8, True], [rc_atype_id, 8, True], [ts_atype_id, 8, True]] TRN.add(sql, sql_args, many=True)
def create(cls, md_template, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ with TRN: # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type") else: data_type_id = convert_to_id(data_type, "data_type") data_type_str = data_type pt_cols = PREP_TEMPLATE_COLUMNS if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS) pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE) md_template = cls._clean_validate_template(md_template, study.id, pt_cols) # Insert the metadata template sql = """INSERT INTO qiita.prep_template (data_type_id, investigation_type) VALUES (%s, %s) RETURNING prep_template_id""" TRN.add(sql, [data_type_id, investigation_type]) prep_id = TRN.execute_fetchlast() try: cls._common_creation_steps(md_template, prep_id) except Exception: # Check if sample IDs present here but not in sample template sql = """SELECT sample_id from qiita.study_sample WHERE study_id = %s""" # Get list of study sample IDs, prep template study IDs, # and their intersection TRN.add(sql, [study.id]) prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( TRN.execute_fetchflatten()) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample ' 'template: %s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise # Link the prep template with the study sql = """INSERT INTO qiita.study_prep_template (study_id, prep_template_id) VALUES (%s, %s)""" TRN.add(sql, [study.id, prep_id]) TRN.execute() pt = cls(prep_id) pt.generate_files() return pt
def investigation_type(self): with TRN: sql = """SELECT investigation_type FROM qiita.prep_template WHERE {0} = %s""".format(self._id_column) TRN.add(sql, [self._id]) return TRN.execute_fetchlast()