def _fetch_key_rows_from_database(self): # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong sql_text = version_for_key(self._collection_id, versioned=self._versioned, key=self._key, unified_id=self._version_id) args = { "collection_id": self._collection_id, "key": self._key, "unified_id": self._version_id } async_result = \ self._interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_nimbusio_node_name) result = async_result.get() if len(result) == 0: raise RetrieveFailedError("key not found {0} {1} {2}".format( self._collection_id, self._key, self._version_id)) # row is of type psycopg2.extras.RealDictRow # we want an honest dict return [dict(row.items()) for row in result]
def _fetch_key_rows_from_database(self): # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong sql_text = version_for_key(self._collection_id, versioned=self._versioned, key=self._key, unified_id=self._version_id) args = {"collection_id" : self._collection_id, "key" : self._key, "unified_id" : self._version_id} async_result = \ self._interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_nimbusio_node_name) result = async_result.get() if len(result) == 0: raise RetrieveFailedError("key not found {0} {1} {2}".format( self._collection_id, self._key, self._version_id)) # row is of type psycopg2.extras.RealDictRow # we want an honest dict return [dict(row.items()) for row in result]
def get_last_modified_and_content_length(interaction_pool, collection_id, versioned, key, version_id=None): log = logging.getLogger("get_last_modified_and_content_length") sql_text = version_for_key(collection_id, versioned=versioned, key=key, unified_id=version_id) args = { "collection_id": collection_id, "key": key, "unified_id": version_id } async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_local_node_name) result = async_result.get() if len(result) == 0: return None, None last_modified, content_length = \ last_modified_and_content_length_from_key_rows(result) log.debug("collection_id={0}, key={1}, version_id={2}, last_modified={3}, " "content_length={4}".format(collection_id, key, version_id, last_modified, content_length)) return last_modified, content_length
def get_last_modified_and_content_length(interaction_pool, collection_id, versioned, key, version_id=None): log = logging.getLogger("get_last_modified_and_content_length") sql_text = version_for_key(collection_id, versioned=versioned, key=key, unified_id=version_id) args = {"collection_id" : collection_id, "key" : key, "unified_id" : version_id} async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_local_node_name) result = async_result.get() if len(result) == 0: return None, None last_modified, content_length = \ last_modified_and_content_length_from_key_rows(result) log.debug("collection_id={0}, key={1}, version_id={2}, last_modified={3}, " "content_length={4}".format(collection_id, key, version_id, last_modified, content_length)) return last_modified, content_length
def test_list_versions_same_rows(self): """ check that this can find all the same rows list_versions returns in the versioned case above """ log = logging.getLogger("test_list_versions_same_rows") sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() for list_versions_row in list_versions_rows: sql_text = version_for_key( _test_collection_id, versioned=True, key=list_versions_row["key"], unified_id=list_versions_row["unified_id"]) args = { "collection_id": _test_collection_id, "key": list_versions_row["key"], "unified_id": list_versions_row["unified_id"] } cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue( len(version_for_key_rows) > 0, "{0} {1}".format(args, list_versions_row)) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_versions_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_versions_row["unified_id"], list_versions_row)
def test_list_versions_same_rows(self): """ check that this can find all the same rows list_versions returns in the versioned case above """ log = logging.getLogger("test_list_versions_same_rows") sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() for list_versions_row in list_versions_rows: sql_text = version_for_key(_test_collection_id, versioned=True, key=list_versions_row["key"], unified_id=list_versions_row["unified_id"]) args = {"collection_id" : _test_collection_id, "key" : list_versions_row["key"], "unified_id" : list_versions_row["unified_id"]} cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue(len(version_for_key_rows) > 0, "{0} {1}".format(args, list_versions_row)) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_versions_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_versions_row["unified_id"], list_versions_row)
def test_version_for_key_find_all_same_rows(self): """ check that this can find all the same rows list_keys returns """ # XXX: this looks like it tests the same stuff as the previous # entry? log = logging.getLogger("test_version_for_key_find_all_same_rows") sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() for list_keys_row in list_keys_rows: sql_text = version_for_key(_test_collection_id, versioned=False, key=list_keys_row["key"]) args = { "collection_id": _test_collection_id, "key": list_keys_row["key"], } cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue(len(version_for_key_rows) > 0) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_keys_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_keys_row["unified_id"])
def retrieve_meta(interaction_pool, collection_id, versioned, key, version_id=None): """ get a dict of meta data associated with the segment """ # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong sql_text = version_for_key(collection_id, versioned=versioned, key=key, unified_id=version_id) args = { "collection_id": collection_id, "key": key, "unified_id": version_id } async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_local_node_name) result = async_result.get() if len(result) == 0: return None async_result = \ interaction_pool.run(interaction=_retrieve_meta_query, interaction_args=[collection_id, result[0]["segment_id"]], pool=_local_node_name) result = async_result.get() return [( row["meta_key"], row["meta_value"], ) for row in result]
def test_version_for_key_find_all_same_rows(self): """ check that this can find all the same rows list_keys returns """ # XXX: this looks like it tests the same stuff as the previous # entry? log = logging.getLogger("test_version_for_key_find_all_same_rows") sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() for list_keys_row in list_keys_rows: sql_text = version_for_key(_test_collection_id, versioned=False, key=list_keys_row["key"]) args = {"collection_id" : _test_collection_id, "key" : list_keys_row["key"], } cursor = self._connection.cursor() cursor.execute(sql_text, args) version_for_key_rows = cursor.fetchall() cursor.close() self.assertTrue(len(version_for_key_rows) > 0) for version_for_key_row in version_for_key_rows: self.assertEqual(version_for_key_row["key"], list_keys_row["key"]) self.assertEqual(version_for_key_row["unified_id"], list_keys_row["unified_id"])
def retrieve_meta(interaction_pool, collection_id, versioned, key, version_id=None): """ get a dict of meta data associated with the segment """ # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong sql_text = version_for_key(collection_id, versioned=versioned, key=key, unified_id=version_id) args = {"collection_id" : collection_id, "key" : key, "unified_id" : version_id} async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"), interaction_args=args, pool=_local_node_name) result = async_result.get() if len(result) == 0: return None async_result = \ interaction_pool.run(interaction=_retrieve_meta_query, interaction_args=[collection_id, result[0]["segment_id"]], pool=_local_node_name) result = async_result.get() return [(row["meta_key"], row["meta_value"],) for row in result]
def test_list_keys_vs_list_versions(self): """ check that this can ONLY find the same rows list_versions returns above IF they are also in the result that list_keys returns (i.e. some of them should be findable, some not.) """ # Background: list_keys returns the newest version of every key. # list_versions returns every version of every key. # If a collection is unversioned, output from list_keys and list_versions # should find the same rows # (although the output from list_keys has an extra column.) # In other words, in a versioned collection, any version of a key # that isn't the newest version should be unreachable. # So, I was imagining the test to do this: # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it # 3. compare the results to determine which keys are older versions # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True or versioned=False. # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() list_versions_set = set([(r["key"], r["unified_id"], ) \ for r in list_versions_rows]) # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it sql_text = list_keys(_test_collection_id, versioned=True, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in list_keys_rows]) # find keys that are only reachable by list_keys when versioned=True # we need this below. sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_list_keys_rows = cursor.fetchall() cursor.close() unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in unversioned_list_keys_rows]) versioned_only_reachable_set = \ list_keys_set - unversioned_list_keys_set # 3. compare the results to determine which keys are older versions older_version_set = list_versions_set - list_keys_set # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. for list_versions_row in list_versions_rows: test_tuple = ( list_versions_row["key"], list_versions_row["unified_id"], ) self.assertIn(test_tuple, list_versions_set) if test_tuple in list_keys_set: self.assertNotIn(test_tuple, older_version_set) else: self.assertIn(test_tuple, older_version_set) # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. for key, unified_id in older_version_set: for versioned in [ False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = { "collection_id": _test_collection_id, "key": key, "unified_id": unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if not versioned: self.assertEqual(len(test_rows), 0) else: self.assertTrue(len(test_rows) > 0) # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True, but only reachable # with versioned=False if they are not in versioned_only_reachable_set. for key, unified_id in list_keys_set: for versioned in [ False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = { "collection_id": _test_collection_id, "key": key, "unified_id": unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if (versioned is False and ( key, unified_id, ) in versioned_only_reachable_set): self.assertTrue( len(test_rows) == 0, "versioned={0} {1}".format(versioned, args)) else: self.assertTrue( len(test_rows) > 0, "versioned={0} {1}".format(versioned, args))
def test_version_for_key(self): """ version_for_key """ log = logging.getLogger("test_version_for_key") # check that for every row in list_keys, calling version_for_key with # unified_id=None should return the same row, regardless of it being # versioned or not. for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = { "collection_id": _test_collection_id, "prefix": _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() for row in baseline_rows: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=row["key"]) args = { "collection_id": _test_collection_id, "key": row["key"] } cursor = self._connection.cursor() if _write_debug_sql: with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() # 2012-12-20 dougfort -- list_keys and list_versions only # retrieve one conjoined part, but version_for_key retrieves # all conjoined parts. So we may have more than one row here. self.assertTrue(len(test_rows) > 0) for test_row in test_rows: self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual( test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) # check that these return empty for versioned in [True, False]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=_test_key, unified_id=_test_no_such_unified_id) args = { "collection_id": _test_collection_id, "key": row["key"], "unified_id": _test_no_such_unified_id } cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() self.assertEqual(len(test_rows), 0, test_rows)
def test_list_keys_vs_list_versions(self): """ check that this can ONLY find the same rows list_versions returns above IF they are also in the result that list_keys returns (i.e. some of them should be findable, some not.) """ # Background: list_keys returns the newest version of every key. # list_versions returns every version of every key. # If a collection is unversioned, output from list_keys and list_versions # should find the same rows # (although the output from list_keys has an extra column.) # In other words, in a versioned collection, any version of a key # that isn't the newest version should be unreachable. # So, I was imagining the test to do this: # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it # 3. compare the results to determine which keys are older versions # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True or versioned=False. # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it. sql_text = list_versions(_test_collection_id, versioned=True, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_versions_rows = cursor.fetchall() cursor.close() list_versions_set = set([(r["key"], r["unified_id"], ) \ for r in list_versions_rows]) # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it sql_text = list_keys(_test_collection_id, versioned=True, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) list_keys_rows = cursor.fetchall() cursor.close() list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in list_keys_rows]) # find keys that are only reachable by list_keys when versioned=True # we need this below. sql_text = list_keys(_test_collection_id, versioned=False, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) unversioned_list_keys_rows = cursor.fetchall() cursor.close() unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \ for r in unversioned_list_keys_rows]) versioned_only_reachable_set = \ list_keys_set - unversioned_list_keys_set # 3. compare the results to determine which keys are older versions older_version_set = list_versions_set - list_keys_set # Sothe rows that are in the output of list_versions but are NOT in # the output of list_keys should be rows that are older versions. # (You may have to discard that extra column from list_keys before # comparing results.) That's probably worth an assert or two to verify # that assumption once you have the lists. for list_versions_row in list_versions_rows: test_tuple = (list_versions_row["key"], list_versions_row["unified_id"], ) self.assertIn(test_tuple, list_versions_set) if test_tuple in list_keys_set: self.assertNotIn(test_tuple, older_version_set) else: self.assertIn(test_tuple, older_version_set) # 4. For each row, call version_for_key with specific unified_id and versioned arguments # and verify finding (or correctly not finding) the result. # Then, if we call version_for_key on those rows that are only in # list_versions with versioned=False and specify their unified_id when # calling version_for_key, they should not be reachable. # With versioned=True they should be reachable. for key, unified_id in older_version_set: for versioned in [False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = {"collection_id" : _test_collection_id, "key" : key, "unified_id" : unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if not versioned: self.assertEqual(len(test_rows), 0) else: self.assertTrue(len(test_rows) > 0) # The rows that were in both list_versions output and list_keys output # should be reachable either with versioned=True, but only reachable # with versioned=False if they are not in versioned_only_reachable_set. for key, unified_id in list_keys_set: for versioned in [False, True, ]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=key, unified_id=unified_id) args = {"collection_id" : _test_collection_id, "key" : key, "unified_id" : unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() if (versioned is False and (key, unified_id, ) in versioned_only_reachable_set ): self.assertTrue(len(test_rows) == 0, "versioned={0} {1}".format(versioned, args)) else: self.assertTrue(len(test_rows) > 0, "versioned={0} {1}".format(versioned, args))
def test_version_for_key(self): """ version_for_key """ log = logging.getLogger("test_version_for_key") # check that for every row in list_keys, calling version_for_key with # unified_id=None should return the same row, regardless of it being # versioned or not. for versioned in [True, False]: sql_text = list_keys(_test_collection_id, versioned=versioned, prefix=_test_prefix) args = {"collection_id" : _test_collection_id, "prefix" : _test_prefix, } cursor = self._connection.cursor() cursor.execute(sql_text, args) baseline_rows = cursor.fetchall() cursor.close() for row in baseline_rows: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=row["key"]) args = {"collection_id" : _test_collection_id, "key" : row["key"]} cursor = self._connection.cursor() if _write_debug_sql: with open("/tmp/debug.sql", "w") as debug_sql_file: debug_sql_file.write(mogrify(sql_text, args)) cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() # 2012-12-20 dougfort -- list_keys and list_versions only # retrieve one conjoined part, but version_for_key retrieves # all conjoined parts. So we may have more than one row here. self.assertTrue(len(test_rows) > 0) for test_row in test_rows: self.assertEqual(test_row["key"], row["key"], (test_row["key"], row["key"])) self.assertEqual(test_row["unified_id"], row["unified_id"], (test_row["unified_id"], row["unified_id"])) # check that these return empty for versioned in [True, False]: sql_text = version_for_key(_test_collection_id, versioned=versioned, key=_test_key, unified_id=_test_no_such_unified_id) args = {"collection_id" : _test_collection_id, "key" : row["key"], "unified_id" : _test_no_such_unified_id} cursor = self._connection.cursor() cursor.execute(sql_text, args) test_rows = cursor.fetchall() cursor.close() self.assertEqual(len(test_rows), 0, test_rows)