def create(cls, md_template, study): r"""Creates the sample template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids study : Study The study to which the sample template belongs to. """ cls._check_subclass() # Check that we don't have a MetadataTemplate for study if cls.exists(study.id): raise QiitaDBDuplicateError(cls.__name__, 'id: %d' % study.id) conn_handler = SQLConnectionHandler() queue_name = "CREATE_SAMPLE_TEMPLATE_%d" % study.id conn_handler.create_queue(queue_name) # Clean and validate the metadata template given md_template = cls._clean_validate_template(md_template, study.id, study.id, conn_handler) cls._add_common_creation_steps_to_queue(md_template, study.id, conn_handler, queue_name) conn_handler.execute_queue(queue_name) st = cls(study.id) st.generate_files() return st
def delete(cls, id_): r"""Deletes the table from the database Parameters ---------- id_ : integer The object identifier Raises ------ QiitaDBUnknownIDError If no sample template with id id_ exists QiitaDBError If the study that owns this sample template has raw datas """ cls._check_subclass() if not cls.exists(id_): raise QiitaDBUnknownIDError(id_, cls.__name__) raw_datas = [str(rd) for rd in Study(cls(id_).study_id).raw_data()] if raw_datas: raise QiitaDBError("Sample template can not be erased because " "there are raw datas (%s) associated." % ', '.join(raw_datas)) table_name = cls._table_name(id_) conn_handler = SQLConnectionHandler() # Delete the sample template filepaths queue = "delete_sample_template_%d" % id_ conn_handler.create_queue(queue) conn_handler.add_to_queue( queue, "DELETE FROM qiita.sample_template_filepath WHERE study_id = %s", (id_, )) conn_handler.add_to_queue( queue, "DROP TABLE qiita.{0}".format(table_name)) conn_handler.add_to_queue( queue, "DELETE FROM qiita.{0} where {1} = %s".format(cls._table, cls._id_column), (id_,)) conn_handler.add_to_queue( queue, "DELETE FROM qiita.{0} where {1} = %s".format(cls._column_table, cls._id_column), (id_,)) conn_handler.execute_queue(queue)
def __setitem__(self, column, value): r"""Sets the metadata value for the category `column` Parameters ---------- column : str The column to update value : str The value to set. This is expected to be a str on the assumption that psycopg2 will cast as necessary when updating. Raises ------ ValueError If the value type does not match the one in the DB """ conn_handler = SQLConnectionHandler() queue_name = "set_item_%s" % self._id conn_handler.create_queue(queue_name) self.add_setitem_queries(column, value, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except QiitaDBExecutionError as e: # catching error so we can check if the error is due to different # column type or something else type_lookup = defaultdict(lambda: 'varchar') type_lookup[int] = 'integer' type_lookup[float] = 'float8' type_lookup[str] = 'varchar' value_type = type_lookup[type(value)] sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" column_type = conn_handler.execute_fetchone( sql, (column, self._table, self._dynamic_table)) if column_type != value_type: raise ValueError( 'The new value being added to column: "{0}" is "{1}" ' '(type: "{2}"). However, this column in the DB is of ' 'type "{3}". Please change the value in your updated ' 'template or reprocess your template.'.format( column, value, value_type, column_type)) raise e
def extend(self, md_template): """Adds the given sample template to the current one Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids """ conn_handler = SQLConnectionHandler() queue_name = "EXTEND_SAMPLE_TEMPLATE_%d" % self.id conn_handler.create_queue(queue_name) md_template = self._clean_validate_template(md_template, self.study_id, SAMPLE_TEMPLATE_COLUMNS) self._add_common_extend_steps_to_queue(md_template, conn_handler, queue_name) conn_handler.execute_queue(queue_name) self.generate_files()
# May 19, 2015 # We attach the prep template directly to the study. The raw data is no longer # attached to the study directly, the prep template points to them. This will # make the RawData to be effectively just a container for the raw files, # which is how it was acting previously. from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.data import RawData from qiita_db.util import move_filepaths_to_upload_folder conn_handler = SQLConnectionHandler() queue = "PATCH_25" conn_handler.create_queue(queue) # the system may contain raw data with no prep template associated to it. # Retrieve all those raw data ids sql = """SELECT raw_data_id FROM qiita.raw_data WHERE raw_data_id NOT IN ( SELECT DISTINCT raw_data_id FROM qiita.prep_template);""" rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)] # We will delete those RawData. However, if they have files attached, we should # move them to the uploads folder of the study sql_detach = """DELETE FROM qiita.study_raw_data WHERE raw_data_id = %s AND study_id = %s""" sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s" sql_studies = """SELECT study_id FROM qiita.study_raw_data WHERE raw_data_id = %s""" move_files = []
def create(cls, md_template, raw_data, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids raw_data : RawData The raw_data to which the prep template belongs to. study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Get a connection handler conn_handler = SQLConnectionHandler() queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id conn_handler.create_queue(queue_name) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type", conn_handler) else: data_type_id = convert_to_id(data_type, "data_type", conn_handler) data_type_str = data_type md_template = cls._clean_validate_template(md_template, study.id, data_type_str, conn_handler) # Insert the metadata template # We need the prep_id for multiple calls below, which currently is not # supported by the queue system. Thus, executing this outside the queue prep_id = conn_handler.execute_fetchone( "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, " "investigation_type) VALUES (%s, %s, %s) RETURNING " "prep_template_id", (data_type_id, raw_data.id, investigation_type))[0] cls._add_common_creation_steps_to_queue(md_template, prep_id, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except Exception: # Clean up row from qiita.prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (prep_id,)) # Check if sample IDs present here but not in sample template sql = ("SELECT sample_id from qiita.required_sample_info WHERE " "study_id = %s") # Get list of study sample IDs, prep template study IDs, # and their intersection prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( s[0] for s in conn_handler.execute_fetchall(sql, [study.id])) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample template: ' '%s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise pt = cls(prep_id) pt.generate_files() return pt
def update_category(self, category, samples_and_values): """Update an existing column Parameters ---------- category : str The category to update samples_and_values : dict A mapping of {sample_id: value} Raises ------ QiitaDBUnknownIDError If a sample_id is included in values that is not in the template QiitaDBColumnError If the column does not exist in the table. This is implicit, and can be thrown by the contained Samples. ValueError If one of the new values cannot be inserted in the DB due to different types """ if not set(self.keys()).issuperset(samples_and_values): missing = set(self.keys()) - set(samples_and_values) table_name = self._table_name(self._id) raise QiitaDBUnknownIDError(missing, table_name) conn_handler = SQLConnectionHandler() queue_name = "update_category_%s_%s" % (self._id, category) conn_handler.create_queue(queue_name) for k, v in viewitems(samples_and_values): sample = self[k] sample.add_setitem_queries(category, v, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except QiitaDBExecutionError as e: # catching error so we can check if the error is due to different # column type or something else type_lookup = defaultdict(lambda: 'varchar') type_lookup[int] = 'integer' type_lookup[float] = 'float8' type_lookup[str] = 'varchar' value_types = set(type_lookup[type(value)] for value in viewvalues(samples_and_values)) sql = """SELECT udt_name FROM information_schema.columns WHERE column_name = %s AND table_schema = 'qiita' AND (table_name = %s OR table_name = %s)""" column_type = conn_handler.execute_fetchone( sql, (category, self._table, self._table_name(self._id))) if any([column_type != vt for vt in value_types]): value_str = ', '.join( [str(value) for value in viewvalues(samples_and_values)]) value_types_str = ', '.join(value_types) raise ValueError( 'The new values being added to column: "%s" are "%s" ' '(types: "%s"). However, this column in the DB is of ' 'type "%s". Please change the values in your updated ' 'template or reprocess your template.' % (category, value_str, value_types_str, column_type)) raise e
class TestConnHandler(TestCase): def test_create_queue(self): self.conn_handler.create_queue("toy_queue") self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"]) def test_close(self): self.assertEqual(self.conn_handler._user_conn.closed, 0) self.conn_handler.close() self.assertNotEqual(self.conn_handler._user_conn.closed, 0) def test_run_queue(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_many(self): sql = ("INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)") sql_args = [ ('*****@*****.**', 'p1', 'pass1', '111-111'), ('*****@*****.**', 'p2', 'pass2', '111-222') ] self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", sql, sql_args, many=True) self.conn_handler.execute_queue('toy_queue') # make sure both users added obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111', None, None, None]] self.assertEqual(obs, exp) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_last_return(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s RETURNING phone", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, ['222-222-2221']) def test_run_queue_placeholders(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s) RETURNING email, password", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s AND password = %s", ['{0}', '{1}']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_queue_fail(self): """Fail if no results data exists for substitution""" self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) self.assertEqual(obs, []) def test_huge_queue(self): self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") # add tons of inserts to queue for x in range(120): self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) # add failing insert as final item in queue self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email LIKE " "'%[email protected]%'") self.assertEqual(obs, []) def test_get_temp_queue(self): my_queue = self.conn_handler.get_temp_queue() self.assertTrue(my_queue in self.conn_handler.list_queues()) self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.qiita_user") self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.user_level") self.conn_handler.execute_queue(my_queue) self.assertTrue(my_queue not in self.conn_handler.list_queues())
# heavily based on 7.py from os.path import basename from skbio.util import flatten from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.metadata_template import PrepTemplate conn_handler = SQLConnectionHandler() sql = "SELECT prep_template_id FROM qiita.prep_template" all_ids = conn_handler.execute_fetchall(sql) q_name = 'unlink-bad-mapping-files' conn_handler.create_queue(q_name) # remove all the bad mapping files for prep_template_id in all_ids: prep_template_id = prep_template_id[0] pt = PrepTemplate(prep_template_id) fps = pt.get_filepaths() # get the QIIME mapping file, note that the way to figure out what is and # what's not a qiime mapping file is to check for the existance of the # word qiime in the basename of the file path, hacky but that's the way # it is being done in qiita_pet/uimodules/raw_data_tab.py mapping_files = [f for f in fps if '_qiime_' in basename(f[1])] table = 'prep_template_filepath'
class TestConnHandler(TestCase): def test_create_queue(self): self.conn_handler.create_queue("toy_queue") self.assertEqual(self.conn_handler.list_queues(), ["toy_queue"]) def test_run_queue(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_many(self): sql = ("INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)") sql_args = [ ('*****@*****.**', 'p1', 'pass1', '111-111'), ('*****@*****.**', 'p2', 'pass2', '111-222') ] self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", sql, sql_args, many=True) self.conn_handler.execute_queue('toy_queue') # make sure both users added obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass1', 'p1', None, None, '111-111', None, None, None]] self.assertEqual(obs, exp) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 5, 'pass2', 'p2', None, None, '111-222', None, None, None]] self.assertEqual(obs, exp) def test_run_queue_last_return(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s)", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s RETURNING phone", ['*****@*****.**']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, ['222-222-2221']) def test_run_queue_placeholders(self): self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password," "phone) VALUES (%s, %s, %s, %s) RETURNING email, password", ['*****@*****.**', 'Toy', 'pass', '111-111-11112']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1, " "phone = '222-222-2221' WHERE email = %s AND password = %s", ['{0}', '{1}']) obs = self.conn_handler.execute_queue("toy_queue") self.assertEqual(obs, []) obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) exp = [['*****@*****.**', 1, 'pass', 'Toy', None, None, '222-222-2221', None, None, None]] self.assertEqual(obs, exp) def test_queue_fail(self): """Fail if no results data exists for substitution""" self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**', 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email = %s", ['*****@*****.**']) self.assertEqual(obs, []) def test_huge_queue(self): self.conn_handler = SQLConnectionHandler() self.conn_handler.create_queue("toy_queue") # add tons of inserts to queue for x in range(120): self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_user (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) # add failing insert as final item in queue self.conn_handler.add_to_queue( "toy_queue", "INSERT INTO qiita.qiita_BADTABLE (email, name, password) VALUES " "(%s, %s, %s)", ['*****@*****.**' % x, 'Toy', 'pass']) self.conn_handler.add_to_queue( "toy_queue", "UPDATE qiita.qiita_user SET user_level_id = 1 " "WHERE email = %s and password = %s", [{0}, {1}]) with self.assertRaises(QiitaDBExecutionError): self.conn_handler.execute_queue("toy_queue") # make sure roll back correctly obs = self.conn_handler.execute_fetchall( "SELECT * from qiita.qiita_user WHERE email LIKE " "'%[email protected]%'") self.assertEqual(obs, []) def test_get_temp_queue(self): my_queue = self.conn_handler.get_temp_queue() self.assertTrue(my_queue in self.conn_handler.list_queues()) self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.qiita_user") self.conn_handler.add_to_queue(my_queue, "SELECT * from qiita.user_level") self.conn_handler.execute_queue(my_queue) self.assertTrue(my_queue not in self.conn_handler.list_queues())