Пример #1
0
def list_versions(interaction_pool, 
                  collection_id, 
                  versioned,
                  prefix=None, 
                  max_keys=1000, 
                  delimiter="",
                  key_marker=None,
                  version_id_marker=None):
    """
    retrieve infromation about versions which are visible: not deleted, etc
    """
    # ask for one more than max_keys, so we can tell if we are truncated
    max_keys = int(max_keys)
    request_count = max_keys + 1

    sql_text = sql_factory.list_versions(collection_id,
                                         versioned=versioned,
                                         prefix=prefix,
                                         key_marker=key_marker,
                                         version_marker=version_id_marker,
                                         limit=request_count)

    args = {"collection_id" : collection_id,
            "prefix"        : (prefix if prefix is not None else ""),
            "key_marker"    : (key_marker if key_marker is not None else ""),
            "version_marker": 
                (version_id_marker if version_id_marker is not None else 0), }

    async_result = interaction_pool.run(interaction=sql_text.encode("utf-8"),
                                        interaction_args=args, 
                                        pool=_local_node_name)

    result = async_result.get()

    truncated = len(result) == request_count
    key_list = list()
    for row in result[:max_keys]:
        key_list.append(
            {"key"                : row["key"], 
            "version_identifier" : row["unified_id"], 
            "timestamp"          : http_timestamp_str(row["timestamp"])})

    if delimiter == "":
        return {"key_data" : key_list, "truncated" : truncated} 

    # XXX: there may be some SQL way to do this efficiently
    prefix_set = set()
    offset = (len(prefix) if prefix is not None else 0)
    for key_entry in key_list:
        delimiter_pos = key_entry["key"].find(delimiter, offset)
        if delimiter_pos > 0:
            prefix_set.add(key_entry["key"][:delimiter_pos+1])

    return {"prefixes" : list(prefix_set), "truncated" : truncated}
Пример #2
0
    def test_list_versions_same_rows(self):
        """
        check that this can find all the same rows list_versions returns in the
        versioned case above
        """
        log = logging.getLogger("test_list_versions_same_rows")

        sql_text = list_versions(_test_collection_id,
                                 versioned=True,
                                 prefix=_test_prefix)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        with open("/tmp/debug.sql", "w") as debug_sql_file:
            debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_versions_rows = cursor.fetchall()
        cursor.close()

        for list_versions_row in list_versions_rows:
            sql_text = version_for_key(
                _test_collection_id,
                versioned=True,
                key=list_versions_row["key"],
                unified_id=list_versions_row["unified_id"])

            args = {
                "collection_id": _test_collection_id,
                "key": list_versions_row["key"],
                "unified_id": list_versions_row["unified_id"]
            }

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            version_for_key_rows = cursor.fetchall()
            cursor.close()

            self.assertTrue(
                len(version_for_key_rows) > 0,
                "{0} {1}".format(args, list_versions_row))
            for version_for_key_row in version_for_key_rows:
                self.assertEqual(version_for_key_row["key"],
                                 list_versions_row["key"])
                self.assertEqual(version_for_key_row["unified_id"],
                                 list_versions_row["unified_id"],
                                 list_versions_row)
    def test_list_versions_same_rows(self):
        """
        check that this can find all the same rows list_versions returns in the
        versioned case above
        """
        log = logging.getLogger("test_list_versions_same_rows")

        sql_text = list_versions(_test_collection_id, 
                                 versioned=True, 
                                 prefix=_test_prefix) 

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        with open("/tmp/debug.sql", "w") as debug_sql_file:
            debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_versions_rows = cursor.fetchall()
        cursor.close()

        for list_versions_row in list_versions_rows:
            sql_text = version_for_key(_test_collection_id, 
                                       versioned=True,
                                       key=list_versions_row["key"], 
                                       unified_id=list_versions_row["unified_id"]) 

            args = {"collection_id" : _test_collection_id,
                    "key"           : list_versions_row["key"], 
                    "unified_id"    : list_versions_row["unified_id"]}

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            version_for_key_rows = cursor.fetchall()
            cursor.close()

            self.assertTrue(len(version_for_key_rows) > 0, 
                            "{0} {1}".format(args, list_versions_row))
            for version_for_key_row in version_for_key_rows:
                self.assertEqual(version_for_key_row["key"],
                                 list_versions_row["key"])
                self.assertEqual(version_for_key_row["unified_id"],
                                 list_versions_row["unified_id"],
                                 list_versions_row)
Пример #4
0
    def test_list_keys_vs_list_versions(self):
        """ 
        check that this can ONLY find the same rows list_versions returns 
        above IF they are also in the result that list_keys returns
        (i.e. some of them should be findable, some not.)
        """
        # Background: list_keys returns the newest version of every key.
        # list_versions returns every version of every key.
        # If a collection is unversioned, output from list_keys and list_versions
        # should find the same rows
        # (although the output from list_keys has an extra column.)
        # In other words, in a versioned collection, any version of a key
        # that isn't the newest version should be unreachable.
        # So, I was imagining the test to do this:
        # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it.
        # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it
        # 3. compare the results to determine which keys are older versions
        # 4. For each row, call version_for_key with specific unified_id and versioned arguments
        #    and verify finding (or correctly not finding) the result.
        #
        # Sothe rows that are in the output of list_versions but are NOT in
        # the output of list_keys should be rows that are older versions.
        # (You may have to discard that extra column from list_keys before
        # comparing results.) That's probably worth an assert or two to verify
        # that assumption once you have the lists.
        # Then, if we call version_for_key on those rows that are only in
        # list_versions with versioned=False and specify their unified_id when
        # calling version_for_key, they should not be reachable.
        # With versioned=True they should be reachable.
        # The rows that were in both list_versions output and list_keys output
        # should be reachable either with versioned=True or versioned=False.

        # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it.
        sql_text = list_versions(_test_collection_id,
                                 versioned=True,
                                 prefix=_test_prefix)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_versions_rows = cursor.fetchall()
        cursor.close()

        list_versions_set = set([(r["key"], r["unified_id"], ) \
                                 for r in list_versions_rows])

        # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it
        sql_text = list_keys(_test_collection_id,
                             versioned=True,
                             prefix=_test_prefix)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_keys_rows = cursor.fetchall()
        cursor.close()

        list_keys_set = set([(r["key"], r["unified_id"], ) \
                                 for r in list_keys_rows])

        # find keys that are only reachable by list_keys when versioned=True
        # we need this below.
        sql_text = list_keys(_test_collection_id,
                             versioned=False,
                             prefix=_test_prefix)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        unversioned_list_keys_rows = cursor.fetchall()
        cursor.close()

        unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \
                                         for r in unversioned_list_keys_rows])

        versioned_only_reachable_set = \
            list_keys_set - unversioned_list_keys_set

        # 3. compare the results to determine which keys are older versions
        older_version_set = list_versions_set - list_keys_set

        # Sothe rows that are in the output of list_versions but are NOT in
        # the output of list_keys should be rows that are older versions.
        # (You may have to discard that extra column from list_keys before
        # comparing results.) That's probably worth an assert or two to verify
        # that assumption once you have the lists.
        for list_versions_row in list_versions_rows:
            test_tuple = (
                list_versions_row["key"],
                list_versions_row["unified_id"],
            )
            self.assertIn(test_tuple, list_versions_set)
            if test_tuple in list_keys_set:
                self.assertNotIn(test_tuple, older_version_set)
            else:
                self.assertIn(test_tuple, older_version_set)

        # 4. For each row, call version_for_key with specific unified_id and versioned arguments
        #    and verify finding (or correctly not finding) the result.

        # Then, if we call version_for_key on those rows that are only in
        # list_versions with versioned=False and specify their unified_id when
        # calling version_for_key, they should not be reachable.
        # With versioned=True they should be reachable.
        for key, unified_id in older_version_set:
            for versioned in [
                    False,
                    True,
            ]:
                sql_text = version_for_key(_test_collection_id,
                                           versioned=versioned,
                                           key=key,
                                           unified_id=unified_id)

                args = {
                    "collection_id": _test_collection_id,
                    "key": key,
                    "unified_id": unified_id
                }

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_rows = cursor.fetchall()
                cursor.close()

                if not versioned:
                    self.assertEqual(len(test_rows), 0)
                else:
                    self.assertTrue(len(test_rows) > 0)

        # The rows that were in both list_versions output and list_keys output
        # should be reachable either with versioned=True, but only reachable
        # with versioned=False if they are not in versioned_only_reachable_set.
        for key, unified_id in list_keys_set:
            for versioned in [
                    False,
                    True,
            ]:
                sql_text = version_for_key(_test_collection_id,
                                           versioned=versioned,
                                           key=key,
                                           unified_id=unified_id)

                args = {
                    "collection_id": _test_collection_id,
                    "key": key,
                    "unified_id": unified_id
                }

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_rows = cursor.fetchall()
                cursor.close()

                if (versioned is False and (
                        key,
                        unified_id,
                ) in versioned_only_reachable_set):
                    self.assertTrue(
                        len(test_rows) == 0,
                        "versioned={0} {1}".format(versioned, args))
                else:
                    self.assertTrue(
                        len(test_rows) > 0,
                        "versioned={0} {1}".format(versioned, args))
Пример #5
0
    def test_limits_and_markers(self):
        """
        check that the limits and markers work correctly. 
        perhaps take the result with limit=None, and run a series of queries 
        with limit=1 for each of those rows, checking results.
        """
        log = logging.getLogger("test_limits_and_markers")

        for versioned in [True, False]:
            sql_text = list_keys(_test_collection_id,
                                 versioned=versioned,
                                 prefix=_test_prefix)

            args = {
                "collection_id": _test_collection_id,
                "prefix": _test_prefix,
            }

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            baseline_rows = cursor.fetchall()
            cursor.close()

            key_marker = None
            for row in baseline_rows:
                sql_text = list_keys(_test_collection_id,
                                     versioned=versioned,
                                     prefix=_test_prefix,
                                     key_marker=key_marker,
                                     limit=1)

                args = {
                    "collection_id": _test_collection_id,
                    "prefix": _test_prefix,
                    "key_marker": key_marker,
                    "limit": 1
                }

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_row = cursor.fetchone()
                cursor.close()

                self.assertEqual(test_row["key"], row["key"],
                                 (test_row["key"], row["key"]))
                self.assertEqual(test_row["unified_id"], row["unified_id"],
                                 (test_row["unified_id"], row["unified_id"]))

                key_marker = test_row["key"]

        for versioned in [True, False]:
            sql_text = list_versions(_test_collection_id,
                                     versioned=versioned,
                                     prefix=_test_prefix,
                                     limit=None)

            args = {
                "collection_id": _test_collection_id,
                "prefix": _test_prefix,
            }

            if _write_debug_sql:
                with open("/tmp/debug_all.sql", "w") as debug_sql_file:
                    debug_sql_file.write(mogrify(sql_text, args))

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            baseline_rows = cursor.fetchall()
            cursor.close()
            baseline_set = set([(
                r["key"],
                r["unified_id"],
            ) for r in baseline_rows])
            key_marker = None
            version_marker = None
            for row_idx, row in enumerate(baseline_rows):
                sql_text = list_versions(_test_collection_id,
                                         versioned=versioned,
                                         prefix=_test_prefix,
                                         key_marker=key_marker,
                                         version_marker=version_marker,
                                         limit=1)

                args = {
                    "collection_id": _test_collection_id,
                    "prefix": _test_prefix,
                    "limit": 1
                }

                if key_marker is not None:
                    args["key_marker"] = key_marker
                if version_marker is not None:
                    args["version_marker"] = version_marker

                if _write_debug_sql:
                    debug_filename = "/tmp/debug_%s.sql" % (row_idx, )
                    with open(debug_filename, "w") as debug_sql_file:
                        debug_sql_file.write(mogrify(sql_text, args))

                # this result should always be stable. is it?
                last_time = None
                for _ in range(5):
                    cursor = self._connection.cursor()
                    cursor.execute(sql_text, args)
                    test_row = cursor.fetchone()
                    cursor.close()
                    if last_time is not None:
                        assert test_row == last_time
                    last_time = test_row

                # make sure it's in the result somewhere. below we test if it's
                # in the right order.
                self.assertEqual((test_row["key"], test_row["unified_id"])
                                 in baseline_set, True)

                log.info("{0}, {1}".format(test_row["key"], row["key"]))
                log.debug(sql_text)

                self.assertEqual(
                    test_row["key"], row["key"],
                    (row_idx, versioned, test_row["key"], row["key"]))
                self.assertEqual(test_row["unified_id"], row["unified_id"],
                                 (row_idx, versioned, test_row["unified_id"],
                                  row["unified_id"]))

                key_marker = test_row["key"]
                version_marker = test_row["unified_id"]
Пример #6
0
    def test_list(self):
        """
        test listing keys and versions of keys
        """
        log = logging.getLogger("test_list")

        versioned = False
        sql_text = list_versions(_test_collection_id,
                                 versioned=versioned,
                                 prefix=_test_prefix,
                                 limit=None)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        if _write_debug_sql:
            with open("/tmp/debug_unversioned_rows.sql",
                      "w") as debug_sql_file:
                debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        unversioned_rows = cursor.fetchall()
        cursor.close()

        collectable_set = self._retrieve_collectables(versioned)
        test_set = set([(
            r["key"],
            r["unified_id"],
        ) for r in unversioned_rows])
        collectable_intersection = test_set & collectable_set
        self.assertEqual(len(collectable_intersection), 0,
                         collectable_intersection)

        # check that there's no more than one row per key for a non-versioned
        # collection
        # check that every row begins with prefix
        unversioned_key_counts = Counter()
        for row in unversioned_rows:
            unversioned_key_counts[row["key"]] += 1
            self.assertTrue(row["key"].startswith(_test_prefix))
        for key, value in unversioned_key_counts.items():
            self.assertEqual(value, 1, (key, value))

        versioned = True
        sql_text = list_versions(_test_collection_id,
                                 versioned=versioned,
                                 prefix=_test_prefix)

        args = {
            "collection_id": _test_collection_id,
            "prefix": _test_prefix,
        }

        if _write_debug_sql:
            with open("/tmp/debug_versioned_rows.sql", "w") as debug_sql_file:
                debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        versioned_rows = cursor.fetchall()
        cursor.close()

        collectable_set = self._retrieve_collectables(versioned)
        test_set = set([(
            r["key"],
            r["unified_id"],
        ) for r in versioned_rows])
        collectable_intersection = test_set & collectable_set
        self.assertEqual(len(collectable_intersection), 0,
                         collectable_intersection)

        latest_versioned_rows = OrderedDict()
        for row in versioned_rows[::-1]:
            latest_versioned_rows.setdefault(row["key"], row)
        latest_versioned_rows = latest_versioned_rows.values()
        latest_versioned_rows.reverse()
        assert len(latest_versioned_rows) <= len(versioned_rows)

        versioned_key_counts = Counter()
        for row in versioned_rows:
            versioned_key_counts[row["key"]] += 1
            self.assertTrue(row["key"].startswith(_test_prefix))

        # check that there's >= as many rows now as above.
        for key, value in versioned_key_counts.items():
            self.assertTrue(value >= unversioned_key_counts[key], (key, value))

        # check that the list keys result is consistent with list_versions in
        # above (although there could be extra columns.)  Note that
        # list_keys(versioned=True) may have records that
        # list_versions(versioned=False) does not have, because there are more
        # ways for a segment to become eligible for garbage collection in an
        # unversioned collection.

        for versioned in [
                False,
                True,
        ]:
            sql_text = list_keys(_test_collection_id,
                                 versioned=versioned,
                                 prefix=_test_prefix,
                                 limit=None)

            args = {
                "collection_id": _test_collection_id,
                "prefix": _test_prefix,
            }

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            key_rows = cursor.fetchall()
            cursor.close()

            if _write_debug_sql:
                debug_filename = "/tmp/debug_key_rows_versioned_%r.sql" % (
                    versioned, )
                with open(debug_filename, "w") as debug_sql_file:
                    debug_sql_file.write(mogrify(sql_text, args))

            collectable_set = self._retrieve_collectables(versioned)
            test_set = set([(
                r["key"],
                r["unified_id"],
            ) for r in key_rows])
            collectable_intersection = test_set & collectable_set
            self.assertEqual(len(collectable_intersection), 0,
                             collectable_intersection)

            if versioned:
                # a list of keys with versioning on may have keys that don't
                # show up in the list of unversioned rows.  That's because in
                # an unversioned collection, keys end when another key is
                # added.  So it's possible for that plus a tombstone to cause a
                # situation where an archive is not eligible for garbage
                # collection in a versioned collection, but it is eligible for
                # garbage collection in an unversioned collection.
                self.assertGreaterEqual(len(key_rows), len(unversioned_rows), (
                    len(key_rows),
                    len(unversioned_rows),
                    versioned,
                ))
            else:
                self.assertEqual(len(key_rows), len(unversioned_rows), (
                    len(key_rows),
                    len(unversioned_rows),
                    versioned,
                ))

            key_counts = Counter()
            for row in key_rows:
                key_counts[row["key"]] += 1
                self.assertTrue(row["key"].startswith(_test_prefix))
            for key, value in key_counts.items():
                self.assertEqual(value, 1, (key, value))

            if versioned:
                for key_row, version_row in zip(key_rows,
                                                latest_versioned_rows):
                    self.assertEqual(key_row["key"], version_row["key"])
                    self.assertEqual(key_row["unified_id"],
                                     version_row["unified_id"])
            else:
                for key_row, version_row in zip(key_rows, unversioned_rows):
                    self.assertEqual(key_row["key"], version_row["key"])
                    self.assertEqual(key_row["unified_id"],
                                     version_row["unified_id"])
    def test_list_keys_vs_list_versions(self):
        """ 
        check that this can ONLY find the same rows list_versions returns 
        above IF they are also in the result that list_keys returns
        (i.e. some of them should be findable, some not.)
        """
        # Background: list_keys returns the newest version of every key. 
        # list_versions returns every version of every key. 
        # If a collection is unversioned, output from list_keys and list_versions 
        # should find the same rows 
        # (although the output from list_keys has an extra column.) 
        # In other words, in a versioned collection, any version of a key 
        # that isn't the newest version should be unreachable.
        # So, I was imagining the test to do this:
        # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it.
        # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it
        # 3. compare the results to determine which keys are older versions
        # 4. For each row, call version_for_key with specific unified_id and versioned arguments 
        #    and verify finding (or correctly not finding) the result.
        #
        # Sothe rows that are in the output of list_versions but are NOT in 
        # the output of list_keys should be rows that are older versions. 
        # (You may have to discard that extra column from list_keys before 
        # comparing results.) That's probably worth an assert or two to verify 
        # that assumption once you have the lists.
        # Then, if we call version_for_key on those rows that are only in 
        # list_versions with versioned=False and specify their unified_id when 
        # calling version_for_key, they should not be reachable. 
        # With versioned=True they should be reachable.
        # The rows that were in both list_versions output and list_keys output 
        # should be reachable either with versioned=True or versioned=False.

        # 1. get the full output from list_versions(test_colelction_id, versioned=True) and save it.
        sql_text = list_versions(_test_collection_id, 
                                 versioned=True, 
                                 prefix=_test_prefix)

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_versions_rows = cursor.fetchall()
        cursor.close()

        list_versions_set = set([(r["key"], r["unified_id"], ) \
                                 for r in list_versions_rows])

        # 2. get the full output from list_keys(test_collection_id, versioned=True) and save it
        sql_text = list_keys(_test_collection_id, 
                             versioned=True, 
                             prefix=_test_prefix)

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        list_keys_rows = cursor.fetchall()
        cursor.close()

        list_keys_set = set([(r["key"], r["unified_id"], ) \
                                 for r in list_keys_rows])

        # find keys that are only reachable by list_keys when versioned=True
        # we need this below.
        sql_text = list_keys(_test_collection_id, 
                             versioned=False, 
                             prefix=_test_prefix)

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        unversioned_list_keys_rows = cursor.fetchall()
        cursor.close()

        unversioned_list_keys_set = set([(r["key"], r["unified_id"], ) \
                                         for r in unversioned_list_keys_rows])

        versioned_only_reachable_set = \
            list_keys_set - unversioned_list_keys_set


        # 3. compare the results to determine which keys are older versions
        older_version_set = list_versions_set - list_keys_set

        # Sothe rows that are in the output of list_versions but are NOT in 
        # the output of list_keys should be rows that are older versions. 
        # (You may have to discard that extra column from list_keys before 
        # comparing results.) That's probably worth an assert or two to verify 
        # that assumption once you have the lists.
        for list_versions_row in list_versions_rows:
            test_tuple = (list_versions_row["key"], 
                          list_versions_row["unified_id"], )
            self.assertIn(test_tuple, list_versions_set)
            if test_tuple in list_keys_set:
                self.assertNotIn(test_tuple, older_version_set)
            else:
                self.assertIn(test_tuple, older_version_set)

        # 4. For each row, call version_for_key with specific unified_id and versioned arguments 
        #    and verify finding (or correctly not finding) the result.

        # Then, if we call version_for_key on those rows that are only in 
        # list_versions with versioned=False and specify their unified_id when 
        # calling version_for_key, they should not be reachable. 
        # With versioned=True they should be reachable.
        for key, unified_id in older_version_set:
            for versioned in [False, True, ]:
                sql_text = version_for_key(_test_collection_id, 
                                           versioned=versioned, 
                                           key=key,
                                           unified_id=unified_id)

                args = {"collection_id" : _test_collection_id,
                        "key"           : key,
                        "unified_id"    : unified_id} 

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_rows = cursor.fetchall()
                cursor.close()

                if not versioned:
                    self.assertEqual(len(test_rows), 0)
                else:
                    self.assertTrue(len(test_rows) > 0)

        # The rows that were in both list_versions output and list_keys output
        # should be reachable either with versioned=True, but only reachable
        # with versioned=False if they are not in versioned_only_reachable_set.
        for key, unified_id in list_keys_set:
            for versioned in [False, True, ]:
                sql_text = version_for_key(_test_collection_id, 
                                           versioned=versioned, 
                                           key=key,
                                           unified_id=unified_id)

                args = {"collection_id" : _test_collection_id,
                        "key"           : key,
                        "unified_id"    : unified_id} 

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_rows = cursor.fetchall()
                cursor.close()

                if (versioned is False
                    and (key, unified_id, ) in versioned_only_reachable_set
                ):
                    self.assertTrue(len(test_rows) == 0,
                                "versioned={0} {1}".format(versioned, args))
                else:
                    self.assertTrue(len(test_rows) > 0, 
                                "versioned={0} {1}".format(versioned, args))
    def test_limits_and_markers(self):
        """
        check that the limits and markers work correctly. 
        perhaps take the result with limit=None, and run a series of queries 
        with limit=1 for each of those rows, checking results.
        """
        log = logging.getLogger("test_limits_and_markers")

        for versioned in [True, False]:
            sql_text = list_keys(_test_collection_id, 
                                 versioned=versioned, 
                                 prefix=_test_prefix)

            args = {"collection_id" : _test_collection_id,
                    "prefix"        : _test_prefix, }

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            baseline_rows = cursor.fetchall()
            cursor.close()

            key_marker = None
            for row in baseline_rows:
                sql_text = list_keys(_test_collection_id, 
                                     versioned=versioned, 
                                     prefix=_test_prefix,
                                     key_marker=key_marker,
                                     limit=1)

                args = {"collection_id" : _test_collection_id,
                        "prefix"        : _test_prefix, 
                        "key_marker"    : key_marker,
                        "limit"         : 1}

                cursor = self._connection.cursor()
                cursor.execute(sql_text, args)
                test_row = cursor.fetchone()
                cursor.close()
                
                self.assertEqual(test_row["key"], row["key"], 
                                 (test_row["key"], row["key"]))
                self.assertEqual(test_row["unified_id"], row["unified_id"], 
                                 (test_row["unified_id"], row["unified_id"]))

                key_marker = test_row["key"]

        for versioned in [True, False]:
            sql_text = list_versions(_test_collection_id, 
                                 versioned=versioned, 
                                 prefix=_test_prefix,
                                 limit=None)

            args = {"collection_id" : _test_collection_id,
                    "prefix"        : _test_prefix, }

            if _write_debug_sql:
                with open("/tmp/debug_all.sql", "w") as debug_sql_file:
                    debug_sql_file.write(mogrify(sql_text, args))

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            baseline_rows = cursor.fetchall()
            cursor.close()
            baseline_set = set([(r["key"], r["unified_id"], ) 
                                for r in baseline_rows])
            key_marker = None
            version_marker = None
            for row_idx, row in enumerate(baseline_rows):
                sql_text = list_versions(_test_collection_id, 
                                     versioned=versioned, 
                                     prefix=_test_prefix,
                                     key_marker=key_marker,
                                     version_marker=version_marker,
                                     limit=1)

                args = {"collection_id" : _test_collection_id,
                        "prefix"        : _test_prefix, 
                        "limit"         : 1}

                if key_marker is not None:
                    args["key_marker"] = key_marker
                if version_marker is not None:
                    args["version_marker"] = version_marker

                if _write_debug_sql:
                    debug_filename = "/tmp/debug_%s.sql" % (row_idx, )
                    with open(debug_filename, "w") as debug_sql_file:
                        debug_sql_file.write(mogrify(sql_text, args))

                # this result should always be stable. is it?
                last_time = None
                for _ in range(5):
                    cursor = self._connection.cursor()
                    cursor.execute(sql_text, args)
                    test_row = cursor.fetchone()
                    cursor.close()
                    if last_time is not None:
                        assert test_row == last_time
                    last_time = test_row

                # make sure it's in the result somewhere. below we test if it's
                # in the right order.
                self.assertEqual(
                    (test_row["key"], test_row["unified_id"]) in baseline_set,
                    True)
                
                log.info("{0}, {1}".format(test_row["key"], row["key"]))
                log.debug(sql_text)

                self.assertEqual(test_row["key"], row["key"], 
                                 (row_idx, versioned, test_row["key"], row["key"]))
                self.assertEqual(test_row["unified_id"], row["unified_id"], 
                                 (row_idx, versioned, test_row["unified_id"], row["unified_id"]))

                key_marker = test_row["key"]
                version_marker = test_row["unified_id"]
    def test_list(self):
        """
        test listing keys and versions of keys
        """
        log = logging.getLogger("test_list")

        versioned = False
        sql_text = list_versions(_test_collection_id, 
                                 versioned=versioned, 
                                 prefix=_test_prefix, 
                                 limit=None)

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        if _write_debug_sql:
            with open("/tmp/debug_unversioned_rows.sql", "w") as debug_sql_file:
                debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        unversioned_rows = cursor.fetchall()
        cursor.close()

        collectable_set = self._retrieve_collectables(versioned)
        test_set = set([(r["key"], r["unified_id"], ) for r in unversioned_rows])
        collectable_intersection = test_set & collectable_set
        self.assertEqual(len(collectable_intersection), 0, 
                         collectable_intersection)

        # check that there's no more than one row per key for a non-versioned 
        # collection
        # check that every row begins with prefix
        unversioned_key_counts = Counter()
        for row in unversioned_rows:
            unversioned_key_counts[row["key"]] += 1
            self.assertTrue(row["key"].startswith(_test_prefix))
        for key, value in unversioned_key_counts.items():
            self.assertEqual(value, 1, (key, value))

        versioned = True
        sql_text = list_versions(_test_collection_id, 
                                 versioned=versioned, 
                                 prefix=_test_prefix)

        args = {"collection_id" : _test_collection_id,
                "prefix"        : _test_prefix, }

        if _write_debug_sql:
            with open("/tmp/debug_versioned_rows.sql", "w") as debug_sql_file:
                debug_sql_file.write(mogrify(sql_text, args))

        cursor = self._connection.cursor()
        cursor.execute(sql_text, args)
        versioned_rows = cursor.fetchall()
        cursor.close()

        collectable_set = self._retrieve_collectables(versioned)
        test_set = set([(r["key"], r["unified_id"], ) for r in versioned_rows])
        collectable_intersection = test_set & collectable_set
        self.assertEqual(len(collectable_intersection), 0, 
                         collectable_intersection)
        
        latest_versioned_rows = OrderedDict()
        for row in versioned_rows[::-1]:
            latest_versioned_rows.setdefault(row["key"], row)
        latest_versioned_rows = latest_versioned_rows.values()
        latest_versioned_rows.reverse()
        assert len(latest_versioned_rows) <= len(versioned_rows)

        versioned_key_counts = Counter()
        for row in versioned_rows:
            versioned_key_counts[row["key"]] += 1
            self.assertTrue(row["key"].startswith(_test_prefix))

        # check that there's >= as many rows now as above.
        for key, value in versioned_key_counts.items():
            self.assertTrue(value >= unversioned_key_counts[key], (key, value))

        # check that the list keys result is consistent with list_versions in
        # above (although there could be extra columns.)  Note that
        # list_keys(versioned=True) may have records that
        # list_versions(versioned=False) does not have, because there are more
        # ways for a segment to become eligible for garbage collection in an
        # unversioned collection.

        for versioned in [False, True, ]:
            sql_text = list_keys(_test_collection_id, 
                                 versioned=versioned, 
                                 prefix=_test_prefix,
                                 limit=None)

            args = {"collection_id" : _test_collection_id,
                    "prefix"        : _test_prefix, }

            cursor = self._connection.cursor()
            cursor.execute(sql_text, args)
            key_rows = cursor.fetchall()
            cursor.close()

            if _write_debug_sql:
                debug_filename = "/tmp/debug_key_rows_versioned_%r.sql" % ( versioned, )
                with open(debug_filename, "w") as debug_sql_file:
                    debug_sql_file.write(mogrify(sql_text, args))

            collectable_set = self._retrieve_collectables(versioned)
            test_set = set([(r["key"], r["unified_id"], ) for r in key_rows])
            collectable_intersection = test_set & collectable_set
            self.assertEqual(len(collectable_intersection), 0, 
                             collectable_intersection)

            if versioned:
                # a list of keys with versioning on may have keys that don't
                # show up in the list of unversioned rows.  That's because in
                # an unversioned collection, keys end when another key is
                # added.  So it's possible for that plus a tombstone to cause a
                # situation where an archive is not eligible for garbage
                # collection in a versioned collection, but it is eligible for
                # garbage collection in an unversioned collection.
                self.assertGreaterEqual(len(key_rows), len(unversioned_rows), 
                    (len(key_rows), len(unversioned_rows), versioned, ))
            else:
                self.assertEqual(len(key_rows), len(unversioned_rows), 
                    (len(key_rows), len(unversioned_rows), versioned, ))

            key_counts = Counter()
            for row in key_rows:
                key_counts[row["key"]] += 1
                self.assertTrue(row["key"].startswith(_test_prefix))
            for key, value in key_counts.items():
                self.assertEqual(value, 1, (key, value))

            if versioned:
                for key_row, version_row in zip(key_rows, latest_versioned_rows):
                    self.assertEqual(key_row["key"], version_row["key"])
                    self.assertEqual(key_row["unified_id"], version_row["unified_id"])
            else:
                for key_row, version_row in zip(key_rows, unversioned_rows):
                    self.assertEqual(key_row["key"], version_row["key"])
                    self.assertEqual(key_row["unified_id"], version_row["unified_id"])