示例#1
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64,
            enable_frequency_sketches=True,
            hosts={i - 6: {
                "db": i
            }
                   for i in range(6, 9)},
        )

    def tearDown(self):
        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1,
                                          to_datetime(1368889980), 1, None)
        assert result == ("ts:1:1368889980:1", 1)

        result = self.db.make_counter_key(TSDBModel.project, 1,
                                          to_datetime(1368889980), "foo", None)
        assert result == ("ts:1:1368889980:46", self.db.get_model_key("foo"))

        result = self.db.make_counter_key(TSDBModel.project, 1,
                                          to_datetime(1368889980), 1, 1)
        assert result == ("ts:1:1368889980:1", "1?e=1")

        result = self.db.make_counter_key(TSDBModel.project, 1,
                                          to_datetime(1368889980), "foo", 1)
        assert result == ("ts:1:1368889980:46",
                          self.db.get_model_key("foo") + "?e=1")

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key("foo")
        assert result == "bf4e529197e56a48ae2737505b9736e4"

        result = self.db.get_model_key(u"我爱啤酒")
        assert result == "26f980fbe1e8a9d3a0123d2049f95f28"

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=2)
        self.db.incr(TSDBModel.project, 1, dts[1], environment_id=1)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([(TSDBModel.project, 1), (TSDBModel.project, 2)],
                           dts[3],
                           count=3,
                           environment_id=1)
        self.db.incr_multi([(TSDBModel.project, 1), (TSDBModel.project, 2)],
                           dts[3],
                           count=1,
                           environment_id=2)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ]
        }

        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ]
        }

        results = self.db.get_range(TSDBModel.project, [1, 2],
                                    dts[0],
                                    dts[-1],
                                    environment_ids=[1])
        assert results == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 3),
            ],
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 3),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {1: 9, 2: 4}

        results = self.db.get_sums(TSDBModel.project, [1, 2],
                                   dts[0],
                                   dts[-1],
                                   environment_id=1)
        assert results == {1: 4, 2: 3}

        results = self.db.get_sums(TSDBModel.project, [1, 2],
                                   dts[0],
                                   dts[-1],
                                   environment_id=0)
        assert results == {1: 0, 2: 0}

        self.db.merge(TSDBModel.project,
                      1, [2],
                      now,
                      environment_ids=[0, 1, 2])

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 8),
            ]
        }

        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        results = self.db.get_range(TSDBModel.project, [1, 2],
                                    dts[0],
                                    dts[-1],
                                    environment_ids=[1])
        assert results == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 6),
            ],
            2: [(timestamp(dts[i]), 0) for i in range(0, 4)],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {1: 13, 2: 0}

        self.db.delete([TSDBModel.project], [1, 2],
                       dts[0],
                       dts[-1],
                       environment_ids=[0, 1, 2])

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {1: 0, 2: 0}

        results = self.db.get_sums(TSDBModel.project, [1, 2],
                                   dts[0],
                                   dts[-1],
                                   environment_id=1)
        assert results == {1: 0, 2: 0}

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(model, 1, ("foo", "bar"), dts[0])

        self.db.record(model, 1, ("baz", ), dts[1], environment_id=1)

        self.db.record_multi(
            ((model, 1, ("foo", "bar")), (model, 2, ("bar", ))), dts[2])

        self.db.record(model, 1, ("baz", ), dts[2], environment_id=1)

        self.db.record(model, 2, ("foo", ), dts[3])

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1], rollup=3600) == {
                1: [
                    (timestamp(dts[0]), 2),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 3),
                    (timestamp(dts[3]), 0),
                ]
            }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1], rollup=3600) == {
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 1),
                ]
            }

        assert self.db.get_distinct_counts_series(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1) == {
                1: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 0),
                ],
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 0),
                    (timestamp(dts[3]), 0),
                ],
            }

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     rollup=3600)
        assert results == {1: 3, 2: 2}

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     rollup=3600,
                                                     environment_id=1)
        assert results == {1: 1, 2: 0}

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     rollup=3600,
                                                     environment_id=0)
        assert results == {1: 0, 2: 0}

        assert self.db.get_distinct_counts_union(model, [],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1, 2],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 3
        assert (self.db.get_distinct_counts_union(model, [1, 2],
                                                  dts[0],
                                                  dts[-1],
                                                  rollup=3600,
                                                  environment_id=1) == 1)
        assert (self.db.get_distinct_counts_union(model, [1, 2],
                                                  dts[0],
                                                  dts[-1],
                                                  rollup=3600,
                                                  environment_id=0) == 0)

        self.db.merge_distinct_counts(model,
                                      1, [2],
                                      dts[0],
                                      environment_ids=[0, 1])

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1], rollup=3600) == {
                1: [
                    (timestamp(dts[0]), 2),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 3),
                    (timestamp(dts[3]), 1),
                ]
            }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1], rollup=3600) == {
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 0),
                    (timestamp(dts[3]), 0),
                ]
            }

        assert self.db.get_distinct_counts_series(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1) == {
                1: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 0),
                ],
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 0),
                    (timestamp(dts[3]), 0),
                ],
            }

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     rollup=3600)
        assert results == {1: 3, 2: 0}

        assert self.db.get_distinct_counts_union(model, [],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [1, 2],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [2],
                                                 dts[0],
                                                 dts[-1],
                                                 rollup=3600) == 0

        self.db.delete_distinct_counts([model], [1, 2],
                                       dts[0],
                                       dts[-1],
                                       environment_ids=[0, 1])

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0],
                                                     dts[-1])
        assert results == {1: 0, 2: 0}

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     environment_id=1)
        assert results == {1: 0, 2: 0}

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_issues_by_project

        # None of the registered frequency tables actually support
        # environments, so we have to pretend like one actually does
        self.db.models_with_environment_support = self.db.models_with_environment_support | set(
            [model])

        rollup = 3600

        self.db.record_frequency_multi(((model, {
            "organization:1": {
                "project:1": 1,
                "project:2": 2,
                "project:3": 3
            }
        }), ), now)

        self.db.record_frequency_multi(
            ((
                model,
                {
                    "organization:1": {
                        "project:1": 1,
                        "project:2": 1,
                        "project:3": 1,
                        "project:4": 1,
                    },
                    "organization:2": {
                        "project:5": 1
                    },
                },
            ), ),
            now - timedelta(hours=1),
        )

        self.db.record_frequency_multi(
            ((
                model,
                {
                    "organization:1": {
                        "project:2": 1,
                        "project:3": 2,
                        "project:4": 3
                    },
                    "organization:2": {
                        "project:5": 0.5
                    },
                },
            ), ),
            now - timedelta(hours=1),
            environment_id=1,
        )

        assert self.db.get_most_frequent(model,
                                         ("organization:1", "organization:2"),
                                         now,
                                         rollup=rollup) == {
                                             "organization:1":
                                             [("project:3", 3.0),
                                              ("project:2", 2.0),
                                              ("project:1", 1.0)],
                                             "organization:2": [],
                                         }

        assert self.db.get_most_frequent(
            model,
            ("organization:1", "organization:2"),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            "organization:1": [("project:4", 3.0), ("project:3", 2.0),
                               ("project:2", 1.0)],
            "organization:2": [("project:5", 0.5)],
        }

        assert self.db.get_most_frequent(model,
                                         ("organization:1", "organization:2"),
                                         now,
                                         limit=1,
                                         rollup=rollup) == {
                                             "organization:1":
                                             [("project:3", 3.0)],
                                             "organization:2": []
                                         }

        assert self.db.get_most_frequent(
            model,
            ("organization:1", "organization:2"),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": [
                ("project:3", 3.0 + 3.0),
                ("project:2", 2.0 + 2.0),
                ("project:4", 4.0),
                ("project:1", 1.0 + 1.0),
            ],
            "organization:2": [("project:5", 1.5)],
        }

        assert self.db.get_most_frequent(
            model,
            ("organization:1", "organization:2"),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=0,
        ) == {
            "organization:1": [],
            "organization:2": []
        }

        timestamp = int(to_timestamp(now) // rollup) * rollup

        assert self.db.get_most_frequent_series(
            model,
            ("organization:1", "organization:2", "organization:3"),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": [
                (
                    timestamp - rollup,
                    {
                        "project:1": 1.0,
                        "project:2": 2.0,
                        "project:3": 3.0,
                        "project:4": 4.0
                    },
                ),
                (timestamp, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0
                }),
            ],
            "organization:2": [(timestamp - rollup, {
                "project:5": 1.5
            }), (timestamp, {})],
            "organization:3": [(timestamp - rollup, {}), (timestamp, {})],
        }

        assert self.db.get_frequency_series(
            model,
            {
                "organization:1":
                ("project:1", "project:2", "project:3", "project:4"),
                "organization:2": ("project:5", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": [
                (
                    timestamp - rollup,
                    {
                        "project:1": 1.0,
                        "project:2": 2.0,
                        "project:3": 3.0,
                        "project:4": 4.0
                    },
                ),
                (
                    timestamp,
                    {
                        "project:1": 1.0,
                        "project:2": 2.0,
                        "project:3": 3.0,
                        "project:4": 0.0
                    },
                ),
            ],
            "organization:2": [
                (timestamp - rollup, {
                    "project:5": 1.5
                }),
                (timestamp, {
                    "project:5": 0.0
                }),
            ],
        }

        assert self.db.get_frequency_series(
            model,
            {
                "organization:1":
                ("project:1", "project:2", "project:3", "project:4"),
                "organization:2": ("project:5", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            "organization:1": [
                (
                    timestamp - rollup,
                    {
                        "project:1": 0.0,
                        "project:2": 1.0,
                        "project:3": 2.0,
                        "project:4": 3.0
                    },
                ),
                (
                    timestamp,
                    {
                        "project:1": 0.0,
                        "project:2": 0.0,
                        "project:3": 0.0,
                        "project:4": 0.0
                    },
                ),
            ],
            "organization:2": [
                (timestamp - rollup, {
                    "project:5": 0.5
                }),
                (timestamp, {
                    "project:5": 0.0
                }),
            ],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                "organization:1": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
                "organization:2": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            "organization:2": {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 1.5,
            },
        }

        self.db.merge_frequencies(model,
                                  "organization:1", ["organization:2"],
                                  now,
                                  environment_ids=[0, 1])

        assert self.db.get_frequency_totals(
            model,
            {
                "organization:1": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
                "organization:2": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 1.5,
            },
            "organization:2": {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 0.0,
            },
        }

        assert self.db.get_frequency_totals(
            model,
            {
                "organization:1": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
                "organization:2": ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            "organization:1": {
                "project:1": 0.0,
                "project:2": 1.0,
                "project:3": 2.0,
                "project:4": 3.0,
                "project:5": 0.5,
            },
            "organization:2": {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 0.0,
            },
        }

        self.db.delete_frequencies(
            [model],
            ["organization:1", "organization:2"],
            now - timedelta(hours=1),
            now,
            environment_ids=[0, 1],
        )

        assert self.db.get_most_frequent(model,
                                         ("organization:1", "organization:2"),
                                         now) == {
                                             "organization:1": [],
                                             "organization:2": [],
                                         }

        assert self.db.get_most_frequent(model,
                                         ("organization:1", "organization:2"),
                                         now,
                                         environment_id=1) == {
                                             "organization:1": [],
                                             "organization:2": []
                                         }

    def test_frequency_table_import_export_no_estimators(self):
        client = self.db.cluster.get_local_client_for_key("key")

        parameters = [64, 5, 10]

        CountMinScript(["1:i", "1:e"],
                       ["INCR"] + parameters + [1, "foo", 2, "bar", 3, "baz"],
                       client=client)

        CountMinScript(
            ["2:i", "2:e"],
            ["INCR"] + parameters + [
                1,
                "alpha",
                2,
                "beta",
                3,
                "gamma",
                4,
                "delta",
                5,
                "epsilon",
                6,
                "zeta",
                7,
                "eta",
                8,
                "theta",
                9,
                "iota",
            ],
            client=client,
        )

        assert client.exists("1:i")
        assert not client.exists("1:e")
        assert client.exists("2:i")
        assert not client.exists("2:e")

        exports = CountMinScript(["2:i", "2:e"], ["EXPORT"] + parameters,
                                 client=client)

        assert len(exports) == 1

        CountMinScript(["1:i", "1:e"], ["IMPORT"] + parameters + [exports[0]],
                       client=client)

        assert client.exists("1:i")
        assert client.exists("1:e")

    def test_frequency_table_import_export_both_estimators(self):
        client = self.db.cluster.get_local_client_for_key("key")

        parameters = [64, 5, 5]

        CountMinScript(
            ["1:i", "1:e"],
            ["INCR"] + parameters + [
                1, "foo", 2, "bar", 3, "baz", 4, "wilco", 5, "tango", 6,
                "foxtrot"
            ],
            client=client,
        )

        CountMinScript(
            ["2:i", "2:e"],
            ["INCR"] + parameters + [
                1,
                "alpha",
                2,
                "beta",
                3,
                "gamma",
                4,
                "delta",
                5,
                "epsilon",
                6,
                "zeta",
                7,
                "eta",
                8,
                "theta",
                9,
                "iota",
            ],
            client=client,
        )

        assert client.exists("1:i")
        assert client.exists("1:e")
        assert client.exists("2:i")
        assert client.exists("2:e")

        exports = CountMinScript(["2:i", "2:e"], ["EXPORT"] + parameters,
                                 client=client)

        assert len(exports) == 1

        CountMinScript(["1:i", "1:e"], ["IMPORT"] + parameters + [exports[0]],
                       client=client)

        assert client.exists("1:i")
        assert client.exists("1:e")

        assert CountMinScript(["1:i", "1:e"], ["RANKED"] + parameters,
                              client=client) == [
                                  [b"iota", b"9"],
                                  [b"theta", b"8"],
                                  [b"eta", b"7"],
                                  [b"zeta", b"6"],
                                  [b"foxtrot", b"6"],
                              ]

    def test_frequency_table_import_export_source_estimators(self):
        client = self.db.cluster.get_local_client_for_key("key")

        parameters = [64, 5, 5]

        CountMinScript(["1:i", "1:e"],
                       ["INCR"] + parameters + [5, "foo", 7, "bar", 9, "baz"],
                       client=client)

        CountMinScript(
            ["2:i", "2:e"],
            ["INCR"] + parameters + [
                1,
                "alpha",
                2,
                "beta",
                3,
                "gamma",
                4,
                "delta",
                5,
                "epsilon",
                6,
                "zeta",
                7,
                "eta",
                8,
                "theta",
                9,
                "iota",
            ],
            client=client,
        )

        assert client.exists("1:i")
        assert not client.exists("1:e")
        assert client.exists("2:i")
        assert client.exists("2:e")

        exports = CountMinScript(["2:i", "2:e"], ["EXPORT"] + parameters,
                                 client=client)

        assert len(exports) == 1

        CountMinScript(["1:i", "1:e"], ["IMPORT"] + parameters + [exports[0]],
                       client=client)

        assert client.exists("1:i")
        assert client.exists("1:e")

        assert CountMinScript(["1:i", "1:e"], ["RANKED"] + parameters,
                              client=client) == [
                                  [b"iota", b"9"],
                                  [b"baz", b"9"],
                                  [b"theta", b"8"],
                                  [b"eta", b"7"],
                                  [b"bar", b"7"],
                              ]

    def test_frequency_table_import_export_destination_estimators(self):
        client = self.db.cluster.get_local_client_for_key("key")

        parameters = [64, 5, 5]

        CountMinScript(
            ["1:i", "1:e"],
            ["INCR"] + parameters + [
                1,
                "alpha",
                2,
                "beta",
                3,
                "gamma",
                4,
                "delta",
                5,
                "epsilon",
                6,
                "zeta",
                7,
                "eta",
                8,
                "theta",
                9,
                "iota",
            ],
            client=client,
        )

        CountMinScript(["2:i", "2:e"],
                       ["INCR"] + parameters + [5, "foo", 7, "bar", 9, "baz"],
                       client=client)

        assert client.exists("1:i")
        assert client.exists("1:e")
        assert client.exists("2:i")
        assert not client.exists("2:e")

        exports = CountMinScript(["2:i", "2:e"], ["EXPORT"] + parameters,
                                 client=client)

        assert len(exports) == 1

        CountMinScript(["1:i", "1:e"], ["IMPORT"] + parameters + [exports[0]],
                       client=client)

        assert client.exists("1:i")
        assert client.exists("1:e")

        assert CountMinScript(["1:i", "1:e"], ["RANKED"] + parameters,
                              client=client) == [
                                  [b"iota", b"9"],
                                  [b"baz", b"9"],
                                  [b"theta", b"8"],
                                  [b"eta", b"7"],
                                  [b"bar", b"7"],
                              ]
示例#2
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64)

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 1)
        assert result == 'ts:1:1368889980:1'

        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 'foo')
        assert result == 'ts:1:1368889980:33'

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([
            (TSDBModel.project, 1),
            (TSDBModel.project, 2),
        ],
                           dts[3],
                           count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz', ),
            dts[1],
        )

        self.db.record_multi((
            (
                model,
                1,
                ('foo', 'bar', 'baz'),
            ),
            (
                model,
                2,
                ('bar', ),
            ),
        ), dts[2])

        self.db.record(
            model,
            2,
            ('foo', ),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1], rollup=3600) == {
                1: [
                    (timestamp(dts[0]), 2),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 3),
                    (timestamp(dts[3]), 0),
                ],
            }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1], rollup=3600) == {
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 1),
                ],
            }

        results = self.db.get_distinct_counts_totals(model, [1, 2],
                                                     dts[0],
                                                     dts[-1],
                                                     rollup=3600)
        assert results == {
            1: 3,
            2: 2,
        }

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_projects_by_organization

        self.db.record_frequency_multi(((model, {
            'organization:1': {
                "project:1": 1,
                "project:2": 2,
                "project:3": 3,
            },
        }), ), now)

        self.db.record_frequency_multi(
            ((model, {
                'organization:1': {
                    "project:1": 1,
                    "project:2": 2,
                    "project:3": 3,
                    "project:4": 4,
                },
                "organization:2": {
                    "project:5": 1.5,
                },
            }), ),
            now - timedelta(hours=1),
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            rollup=3600,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
                ('project:2', 2.0),
                ('project:1', 1.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            limit=1,
            rollup=3600,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
            rollup=3600,
        ) == {
            'organization:1': [
                ('project:3', 3.0 + 3.0),
                ('project:2', 2.0 + 2.0),
                ('project:4', 4.0),
                ('project:1', 1.0 + 1.0),
            ],
            'organization:2': [
                ('project:5', 1.5),
            ],
        }

        rollup = 3600
        timestamp = int(to_timestamp(now) // rollup) * rollup
        assert self.db.get_frequency_series(
            model,
            {
                'organization:1':
                ("project:1", "project:2", "project:3", "project:4"),
                'organization:2': ("project:5", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (timestamp - rollup, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 4.0,
                }),
                (timestamp, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 0.0,
                }),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    "project:5": 1.5,
                }),
                (timestamp, {
                    "project:5": 0.0,
                }),
            ],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3",
                                   "project:4", "project:5"),
                'organization:2': ("project:1", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=3600,
        ) == {
            'organization:1': {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            'organization:2': {
                "project:1": 0.0,
            },
        }
示例#3
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64,
            enable_frequency_sketches=True,
        )

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 1)
        assert result == "ts:1:1368889980:1"

        result = self.db.make_counter_key(TSDBModel.project, 1368889980, "foo")
        assert result == "ts:1:1368889980:33"

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key("foo")
        assert result == "bf4e529197e56a48ae2737505b9736e4"

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([(TSDBModel.project, 1), (TSDBModel.project, 2)], dts[3], count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [(timestamp(dts[0]), 1), (timestamp(dts[1]), 3), (timestamp(dts[2]), 1), (timestamp(dts[3]), 4)]
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [(timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 4)]
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {1: 9, 2: 4}

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(model, 1, ("foo", "bar"), dts[0])

        self.db.record(model, 1, ("baz",), dts[1])

        self.db.record_multi(((model, 1, ("foo", "bar", "baz")), (model, 2, ("bar",))), dts[2])

        self.db.record(model, 2, ("foo",), dts[3])

        assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1], rollup=3600) == {
            1: [(timestamp(dts[0]), 2), (timestamp(dts[1]), 1), (timestamp(dts[2]), 3), (timestamp(dts[3]), 0)]
        }

        assert self.db.get_distinct_counts_series(model, [2], dts[0], dts[-1], rollup=3600) == {
            2: [(timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 1), (timestamp(dts[3]), 1)]
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
        assert results == {1: 3, 2: 2}

        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_projects_by_organization

        rollup = 3600

        self.db.record_frequency_multi(
            ((model, {"organization:1": {"project:1": 1, "project:2": 2, "project:3": 3}}),), now
        )

        self.db.record_frequency_multi(
            (
                (
                    model,
                    {
                        "organization:1": {"project:1": 1, "project:2": 2, "project:3": 3, "project:4": 4},
                        "organization:2": {"project:5": 1.5},
                    },
                ),
            ),
            now - timedelta(hours=1),
        )

        assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now, rollup=rollup) == {
            "organization:1": [("project:3", 3.0), ("project:2", 2.0), ("project:1", 1.0)],
            "organization:2": [],
        }

        assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now, limit=1, rollup=rollup) == {
            "organization:1": [("project:3", 3.0)],
            "organization:2": [],
        }

        assert self.db.get_most_frequent(
            model, ("organization:1", "organization:2"), now - timedelta(hours=1), now, rollup=rollup
        ) == {
            "organization:1": [
                ("project:3", 3.0 + 3.0),
                ("project:2", 2.0 + 2.0),
                ("project:4", 4.0),
                ("project:1", 1.0 + 1.0),
            ],
            "organization:2": [("project:5", 1.5)],
        }

        timestamp = int(to_timestamp(now) // rollup) * rollup

        assert self.db.get_most_frequent_series(
            model, ("organization:1", "organization:2", "organization:3"), now - timedelta(hours=1), now, rollup=rollup
        ) == {
            "organization:1": [
                (timestamp - rollup, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0}),
                (timestamp, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0}),
            ],
            "organization:2": [(timestamp - rollup, {"project:5": 1.5}), (timestamp, {})],
            "organization:3": [(timestamp - rollup, {}), (timestamp, {})],
        }

        assert self.db.get_frequency_series(
            model,
            {"organization:1": ("project:1", "project:2", "project:3", "project:4"), "organization:2": ("project:5",)},
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": [
                (timestamp - rollup, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0}),
                (timestamp, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 0.0}),
            ],
            "organization:2": [(timestamp - rollup, {"project:5": 1.5}), (timestamp, {"project:5": 0.0})],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"),
                "organization:2": ("project:1",),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            "organization:1": {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            "organization:2": {"project:1": 0.0},
        }
示例#4
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64,
            enable_frequency_sketches=True,
            hosts={
                i - 6: {'db': i} for i in xrange(6, 9)
            },
        )

    def tearDown(self):
        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 1)
        assert result == 'ts:1:1368889980:1'

        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 'foo')
        assert result == 'ts:1:1368889980:33'

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([
            (TSDBModel.project, 1),
            (TSDBModel.project, 2),
        ], dts[3], count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

        self.db.merge(TSDBModel.project, 1, [2], now)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 8),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 13,
            2: 0,
        }

        self.db.delete([TSDBModel.project], [1, 2], dts[0], dts[-1])

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 0,
            2: 0,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz',),
            dts[1],
        )

        self.db.record_multi((
            (
                model,
                1,
                ('foo', 'bar', 'baz'),
            ),
            (
                model,
                2,
                ('bar',),
            ),
        ), dts[2])

        self.db.record(
            model,
            2,
            ('foo',),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1], rollup=3600) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 0),
            ],
        }

        assert self.db.get_distinct_counts_series(model, [2], dts[0], dts[-1], rollup=3600) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 1),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
        assert results == {
            1: 3,
            2: 2,
        }

        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3

        self.db.merge_distinct_counts(model, 1, [2], dts[0])

        assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1], rollup=3600) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 1),
            ],
        }

        assert self.db.get_distinct_counts_series(model, [2], dts[0], dts[-1], rollup=3600) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
        assert results == {
            1: 3,
            2: 0,
        }

        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1], dts[0], dts[-1], rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [2], dts[0], dts[-1], rollup=3600) == 0

        self.db.delete_distinct_counts([model], [1, 2], dts[0], dts[-1])

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 0,
            2: 0,
        }

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_projects_by_organization

        rollup = 3600

        self.db.record_frequency_multi(
            (
                (model, {
                    'organization:1': {
                        "project:1": 1,
                        "project:2": 2,
                        "project:3": 3,
                    },
                }),
            ),
            now
        )

        self.db.record_frequency_multi(
            (
                (model, {
                    'organization:1': {
                        "project:1": 1,
                        "project:2": 2,
                        "project:3": 3,
                        "project:4": 4,
                    },
                    "organization:2": {
                        "project:5": 1.5,
                    },
                }),
            ),
            now - timedelta(hours=1),
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
                ('project:2', 2.0),
                ('project:1', 1.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            limit=1,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0 + 3.0),
                ('project:2', 2.0 + 2.0),
                ('project:4', 4.0),
                ('project:1', 1.0 + 1.0),
            ],
            'organization:2': [
                ('project:5', 1.5),
            ],
        }

        timestamp = int(to_timestamp(now) // rollup) * rollup

        assert self.db.get_most_frequent_series(
            model,
            (
                'organization:1',
                'organization:2',
                'organization:3',
            ),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (timestamp - rollup, {
                    'project:1': 1.0,
                    'project:2': 2.0,
                    'project:3': 3.0,
                    'project:4': 4.0,
                }),
                (timestamp, {
                    'project:1': 1.0,
                    'project:2': 2.0,
                    'project:3': 3.0,
                }),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    'project:5': 1.5,
                }),
                (timestamp, {}),
            ],
            'organization:3': [
                (timestamp - rollup, {}),
                (timestamp, {}),
            ],
        }

        assert self.db.get_frequency_series(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4"),
                'organization:2': ("project:5",),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (timestamp - rollup, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 4.0,
                }),
                (timestamp, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 0.0,
                }),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    "project:5": 1.5,
                }),
                (timestamp, {
                    "project:5": 0.0,
                }),
            ],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"),
                'organization:2': ("project:1",),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            'organization:2': {
                "project:1": 0.0,
            },
        }

        self.db.delete_frequencies(
            [model],
            ['organization:1', 'organization:2'],
            now - timedelta(hours=1),
            now,
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
        ) == {
            'organization:1': [],
            'organization:2': [],
        }

    def test_frequency_table_import_export_no_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 10]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1, 'foo',
                2, 'bar',
                3, 'baz',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1, 'alpha',
                2, 'beta',
                3, 'gamma',
                4, 'delta',
                5, 'epsilon',
                6, 'zeta',
                7, 'eta',
                8, 'theta',
                9, 'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert not client.exists('1:e')
        assert client.exists('2:i')
        assert not client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

    def test_frequency_table_import_export_both_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1, 'foo',
                2, 'bar',
                3, 'baz',
                4, 'wilco',
                5, 'tango',
                6, 'foxtrot',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1, 'alpha',
                2, 'beta',
                3, 'gamma',
                4, 'delta',
                5, 'epsilon',
                6, 'zeta',
                7, 'eta',
                8, 'theta',
                9, 'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')
        assert client.exists('2:i')
        assert client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['zeta', '6'],
            ['foxtrot', '6'],
        ]

    def test_frequency_table_import_export_source_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                5, 'foo',
                7, 'bar',
                9, 'baz',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1, 'alpha',
                2, 'beta',
                3, 'gamma',
                4, 'delta',
                5, 'epsilon',
                6, 'zeta',
                7, 'eta',
                8, 'theta',
                9, 'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert not client.exists('1:e')
        assert client.exists('2:i')
        assert client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['baz', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['bar', '7'],
        ]

    def test_frequency_table_import_export_destination_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1, 'alpha',
                2, 'beta',
                3, 'gamma',
                4, 'delta',
                5, 'epsilon',
                6, 'zeta',
                7, 'eta',
                8, 'theta',
                9, 'iota',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                5, 'foo',
                7, 'bar',
                9, 'baz',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')
        assert client.exists('2:i')
        assert not client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['baz', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['bar', '7'],
        ]
示例#5
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64,
            enable_frequency_sketches=True,
            hosts={i - 6: {
                'db': i
            } for i in range(6, 9)},
        )

    def tearDown(self):
        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, None)
        assert result == ('ts:1:1368889980:1', 1)

        result = self.db.make_counter_key(
            TSDBModel.project, 1, to_datetime(1368889980), 'foo', None)
        assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo'))

        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, 1)
        assert result == ('ts:1:1368889980:1', '1?e=1')

        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 'foo', 1)
        assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo') + '?e=1')

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=2)
        self.db.incr(TSDBModel.project, 1, dts[1], environment_id=1)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi(
            [
                (TSDBModel.project, 1),
                (TSDBModel.project, 2),
            ], dts[3], count=3, environment_id=1
        )
        self.db.incr_multi(
            [
                (TSDBModel.project, 1),
                (TSDBModel.project, 2),
            ], dts[3], count=1, environment_id=2
        )

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_range(TSDBModel.project, [1, 2], dts[0], dts[-1], environment_id=1)
        assert results == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 3),
            ],
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 3),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1], environment_id=1)
        assert results == {
            1: 4,
            2: 3,
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1], environment_id=0)
        assert results == {
            1: 0,
            2: 0,
        }

        self.db.merge(TSDBModel.project, 1, [2], now, environment_ids=[0, 1, 2])

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 8),
            ],
        }

        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        results = self.db.get_range(TSDBModel.project, [1, 2], dts[0], dts[-1], environment_id=1)
        assert results == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 6),
            ],
            2: [(timestamp(dts[i]), 0) for i in range(0, 4)],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 13,
            2: 0,
        }

        self.db.delete([TSDBModel.project], [1, 2], dts[0], dts[-1], environment_ids=[0, 1, 2])

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 0,
            2: 0,
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1], environment_id=1)
        assert results == {
            1: 0,
            2: 0,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
        dts = [now + timedelta(hours=i) for i in range(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz', ),
            dts[1],
            environment_id=1,
        )

        self.db.record_multi(
            ((model, 1, ('foo', 'bar'), ), (model, 2, ('bar', ), ), ), dts[2]
        )

        self.db.record(
            model,
            1,
            ('baz', ),
            dts[2],
            environment_id=1,
        )

        self.db.record(
            model,
            2,
            ('foo', ),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1], rollup=3600
        ) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 0),
            ],
        }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1], rollup=3600
        ) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 1),
            ],
        }

        assert self.db.get_distinct_counts_series(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1,
        ) == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 0),
            ],
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
        assert results == {
            1: 3,
            2: 2,
        }

        results = self.db.get_distinct_counts_totals(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1)
        assert results == {
            1: 1,
            2: 0,
        }

        results = self.db.get_distinct_counts_totals(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=0)
        assert results == {
            1: 0,
            2: 0,
        }

        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
        assert self.db.get_distinct_counts_union(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1) == 1
        assert self.db.get_distinct_counts_union(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=0) == 0

        self.db.merge_distinct_counts(model, 1, [2], dts[0], environment_ids=[0, 1])

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1], rollup=3600
        ) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 1),
            ],
        }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1], rollup=3600
        ) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        assert self.db.get_distinct_counts_series(
            model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1,
        ) == {
            1: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 0),
            ],
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
        assert results == {
            1: 3,
            2: 0,
        }

        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
        assert self.db.get_distinct_counts_union(model, [1], dts[0], dts[-1], rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
        assert self.db.get_distinct_counts_union(model, [2], dts[0], dts[-1], rollup=3600) == 0

        self.db.delete_distinct_counts([model], [1, 2], dts[0], dts[-1], environment_ids=[0, 1])

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 0,
            2: 0,
        }

        results = self.db.get_distinct_counts_totals(
            model, [1, 2], dts[0], dts[-1], environment_id=1)
        assert results == {
            1: 0,
            2: 0,
        }

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_projects_by_organization

        # None of the registered frequency tables actually support
        # environments, so we have to pretend like one actually does
        self.db.models_with_environment_support = self.db.models_with_environment_support | set([
                                                                                                model])

        rollup = 3600

        self.db.record_frequency_multi(
            ((model, {
                'organization:1': {
                    "project:1": 1,
                    "project:2": 2,
                    "project:3": 3,
                },
            }), ), now
        )

        self.db.record_frequency_multi(
            (
                (
                    model, {
                        'organization:1': {
                            "project:1": 1,
                            "project:2": 1,
                            "project:3": 1,
                            "project:4": 1,
                        },
                        "organization:2": {
                            "project:5": 1,
                        },
                    }
                ),
            ),
            now - timedelta(hours=1),
        )

        self.db.record_frequency_multi(
            (
                (
                    model, {
                        'organization:1': {
                            "project:2": 1,
                            "project:3": 2,
                            "project:4": 3,
                        },
                        "organization:2": {
                            "project:5": 0.5,
                        },
                    }
                ),
            ),
            now - timedelta(hours=1),
            environment_id=1,
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
                ('project:2', 2.0),
                ('project:1', 1.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            'organization:1': [
                ('project:4', 3.0),
                ('project:3', 2.0),
                ('project:2', 1.0),
            ],
            'organization:2': [
                ('project:5', 0.5),
            ],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            limit=1,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                ('project:3', 3.0 + 3.0),
                ('project:2', 2.0 + 2.0),
                ('project:4', 4.0),
                ('project:1', 1.0 + 1.0),
            ],
            'organization:2': [
                ('project:5', 1.5),
            ],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=0,
        ) == {
            'organization:1': [
            ],
            'organization:2': [
            ],
        }

        timestamp = int(to_timestamp(now) // rollup) * rollup

        assert self.db.get_most_frequent_series(
            model,
            ('organization:1', 'organization:2', 'organization:3', ),
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (
                    timestamp - rollup, {
                        'project:1': 1.0,
                        'project:2': 2.0,
                        'project:3': 3.0,
                        'project:4': 4.0,
                    }
                ),
                (timestamp, {
                    'project:1': 1.0,
                    'project:2': 2.0,
                    'project:3': 3.0,
                }),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    'project:5': 1.5,
                }),
                (timestamp, {}),
            ],
            'organization:3': [
                (timestamp - rollup, {}),
                (timestamp, {}),
            ],
        }

        assert self.db.get_frequency_series(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4"),
                'organization:2': ("project:5", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (
                    timestamp - rollup, {
                        "project:1": 1.0,
                        "project:2": 2.0,
                        "project:3": 3.0,
                        "project:4": 4.0,
                    }
                ),
                (
                    timestamp, {
                        "project:1": 1.0,
                        "project:2": 2.0,
                        "project:3": 3.0,
                        "project:4": 0.0,
                    }
                ),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    "project:5": 1.5,
                }),
                (timestamp, {
                    "project:5": 0.0,
                }),
            ],
        }

        assert self.db.get_frequency_series(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4"),
                'organization:2': ("project:5", ),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            'organization:1': [
                (
                    timestamp - rollup, {
                        "project:1": 0.0,
                        "project:2": 1.0,
                        "project:3": 2.0,
                        "project:4": 3.0,
                    }
                ),
                (
                    timestamp, {
                        "project:1": 0.0,
                        "project:2": 0.0,
                        "project:3": 0.0,
                        "project:4": 0.0,
                    }
                ),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    "project:5": 0.5,
                }),
                (timestamp, {
                    "project:5": 0.0,
                }),
            ],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"),
                'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            'organization:2': {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 1.5,
            },
        }

        self.db.merge_frequencies(
            model,
            'organization:1',
            ['organization:2'],
            now,
            environment_ids=[0, 1],
        )

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"),
                'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 1.5,
            },
            'organization:2': {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 0.0,
            },
        }

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"),
                'organization:2': ("project:1", "project:2", "project:3", "project:4", "project:5"),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
            environment_id=1,
        ) == {
            'organization:1': {
                "project:1": 0.0,
                "project:2": 1.0,
                "project:3": 2.0,
                "project:4": 3.0,
                "project:5": 0.5,
            },
            'organization:2': {
                "project:1": 0.0,
                "project:2": 0.0,
                "project:3": 0.0,
                "project:4": 0.0,
                "project:5": 0.0,
            },
        }

        self.db.delete_frequencies(
            [model],
            ['organization:1', 'organization:2'],
            now - timedelta(hours=1),
            now,
            environment_ids=[0, 1],
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
        ) == {
            'organization:1': [],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            environment_id=1,
        ) == {
            'organization:1': [],
            'organization:2': [],
        }

    def test_frequency_table_import_export_no_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 10]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1,
                'foo',
                2,
                'bar',
                3,
                'baz',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1,
                'alpha',
                2,
                'beta',
                3,
                'gamma',
                4,
                'delta',
                5,
                'epsilon',
                6,
                'zeta',
                7,
                'eta',
                8,
                'theta',
                9,
                'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert not client.exists('1:e')
        assert client.exists('2:i')
        assert not client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

    def test_frequency_table_import_export_both_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1,
                'foo',
                2,
                'bar',
                3,
                'baz',
                4,
                'wilco',
                5,
                'tango',
                6,
                'foxtrot',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1,
                'alpha',
                2,
                'beta',
                3,
                'gamma',
                4,
                'delta',
                5,
                'epsilon',
                6,
                'zeta',
                7,
                'eta',
                8,
                'theta',
                9,
                'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')
        assert client.exists('2:i')
        assert client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['zeta', '6'],
            ['foxtrot', '6'],
        ]

    def test_frequency_table_import_export_source_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                5,
                'foo',
                7,
                'bar',
                9,
                'baz',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                1,
                'alpha',
                2,
                'beta',
                3,
                'gamma',
                4,
                'delta',
                5,
                'epsilon',
                6,
                'zeta',
                7,
                'eta',
                8,
                'theta',
                9,
                'iota',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert not client.exists('1:e')
        assert client.exists('2:i')
        assert client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['baz', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['bar', '7'],
        ]

    def test_frequency_table_import_export_destination_estimators(self):
        client = self.db.cluster.get_local_client_for_key('key')

        parameters = [64, 5, 5]

        CountMinScript(
            ['1:i', '1:e'],
            ['INCR'] + parameters + [
                1,
                'alpha',
                2,
                'beta',
                3,
                'gamma',
                4,
                'delta',
                5,
                'epsilon',
                6,
                'zeta',
                7,
                'eta',
                8,
                'theta',
                9,
                'iota',
            ],
            client=client,
        )

        CountMinScript(
            ['2:i', '2:e'],
            ['INCR'] + parameters + [
                5,
                'foo',
                7,
                'bar',
                9,
                'baz',
            ],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')
        assert client.exists('2:i')
        assert not client.exists('2:e')

        exports = CountMinScript(
            ['2:i', '2:e'],
            ['EXPORT'] + parameters,
            client=client,
        )

        assert len(exports) == 1

        CountMinScript(
            ['1:i', '1:e'],
            ['IMPORT'] + parameters + [exports[0]],
            client=client,
        )

        assert client.exists('1:i')
        assert client.exists('1:e')

        assert CountMinScript(
            ['1:i', '1:e'],
            ['RANKED'] + parameters,
            client=client,
        ) == [
            ['iota', '9'],
            ['baz', '9'],
            ['theta', '8'],
            ['eta', '7'],
            ['bar', '7'],
        ]
示例#6
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(hosts={
            0: {'db': 9}
        }, rollups=(
            # time in seconds, samples to keep
            (10, 30),  # 5 minutes at 10 seconds
            (ONE_MINUTE, 120),  # 2 hours at 1 minute
            (ONE_HOUR, 24),  # 1 days at 1 hour
            (ONE_DAY, 30),  # 30 days at 1 day
        ), vnodes=64)

        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 1)
        assert result == 'ts:1:1368889980:1'

        result = self.db.make_counter_key(TSDBModel.project, 1368889980, 'foo')
        assert result == 'ts:1:1368889980:33'

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([
            (TSDBModel.project, 1),
            (TSDBModel.project, 2),
        ], dts[3], count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz',),
            dts[1],
        )

        self.db.record_multi((
            (
                model,
                1,
                ('foo', 'bar', 'baz'),
            ),
            (
                model,
                2,
                ('bar',),
            ),
        ), dts[2])

        self.db.record(
            model,
            2,
            ('foo',),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1]) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 0),
            ],
        }

        assert self.db.get_distinct_counts_series(model, [2], dts[0], dts[-1]) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 1),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 3,
            2: 2,
        }

    def test_frequency_tables(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        model = TSDBModel.frequent_projects_by_organization

        self.db.record_frequency_multi(
            (
                (model, {
                    'organization:1': {
                        "project:1": 1,
                        "project:2": 2,
                        "project:3": 3,
                    },
                }),
            ),
            now
        )

        self.db.record_frequency_multi(
            (
                (model, {
                    'organization:1': {
                        "project:1": 1,
                        "project:2": 2,
                        "project:3": 3,
                        "project:4": 4,
                    },
                    "organization:2": {
                        "project:5": 1.5,
                    },
                }),
            ),
            now - timedelta(hours=1),
        )

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
                ('project:2', 2.0),
                ('project:1', 1.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now,
            limit=1,
        ) == {
            'organization:1': [
                ('project:3', 3.0),
            ],
            'organization:2': [],
        }

        assert self.db.get_most_frequent(
            model,
            ('organization:1', 'organization:2'),
            now - timedelta(hours=1),
            now,
        ) == {
            'organization:1': [
                ('project:3', 3.0 + 3.0),
                ('project:2', 2.0 + 2.0),
                ('project:4', 4.0),
                ('project:1', 1.0 + 1.0),
            ],
            'organization:2': [
                ('project:5', 1.5),
            ],
        }

        rollup = 3600
        timestamp = int(to_timestamp(now) // rollup) * rollup
        assert self.db.get_frequency_series(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4"),
                'organization:2': ("project:5",),
            },
            now - timedelta(hours=1),
            now,
            rollup=rollup,
        ) == {
            'organization:1': [
                (timestamp - rollup, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 4.0,
                }),
                (timestamp, {
                    "project:1": 1.0,
                    "project:2": 2.0,
                    "project:3": 3.0,
                    "project:4": 0.0,
                }),
            ],
            'organization:2': [
                (timestamp - rollup, {
                    "project:5": 1.5,
                }),
                (timestamp, {
                    "project:5": 0.0,
                }),
            ],
        }

        assert self.db.get_frequency_totals(
            model,
            {
                'organization:1': ("project:1", "project:2", "project:3", "project:4", "project:5"),
                'organization:2': ("project:1",),
            },
            now - timedelta(hours=1),
            now,
        ) == {
            'organization:1': {
                "project:1": 1.0 + 1.0,
                "project:2": 2.0 + 2.0,
                "project:3": 3.0 + 3.0,
                "project:4": 4.0,
                "project:5": 0.0,
            },
            'organization:2': {
                "project:1": 0.0,
            },
        }
示例#7
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(hosts={
            0: {'db': 9}
        }, rollups=(
            # time in seconds, samples to keep
            (10, 30),  # 5 minutes at 10 seconds
            (ONE_MINUTE, 120),  # 2 hours at 1 minute
            (ONE_HOUR, 24),  # 1 days at 1 hour
            (ONE_DAY, 30),  # 30 days at 1 day
        ), vnodes=64)

        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_key(self):
        result = self.db.make_key(TSDBModel.project, 1368889980, 1)
        assert result == 'ts:1:1368889980:1'

        result = self.db.make_key(TSDBModel.project, 1368889980, 'foo')
        assert result == 'ts:1:1368889980:33'

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([
            (TSDBModel.project, 1),
            (TSDBModel.project, 2),
        ], dts[3], count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz',),
            dts[1],
        )

        self.db.record_multi((
            (
                model,
                1,
                ('foo', 'bar', 'baz'),
            ),
            (
                model,
                2,
                ('bar',),
            ),
        ), dts[2])

        self.db.record(
            model,
            2,
            ('foo',),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1]) == {
            1: [
                (timestamp(dts[0]), 2),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 0),
            ],
        }

        assert self.db.get_distinct_counts_series(model, [2], dts[0], dts[-1]) == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 1),
            ],
        }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 3,
            2: 2,
        }
示例#8
0
class RedisTSDBTest(TestCase):
    def setUp(self):
        self.db = RedisTSDB(
            hosts={0: {
                'db': 9
            }},
            rollups=(
                # time in seconds, samples to keep
                (10, 30),  # 5 minutes at 10 seconds
                (ONE_MINUTE, 120),  # 2 hours at 1 minute
                (ONE_HOUR, 24),  # 1 days at 1 hour
                (ONE_DAY, 30),  # 30 days at 1 day
            ),
            vnodes=64)

        with self.db.cluster.all() as client:
            client.flushdb()

    def test_make_key(self):
        result = self.db.make_key(TSDBModel.project, 1368889980, 1)
        assert result == 'ts:1:1368889980:1'

        result = self.db.make_key(TSDBModel.project, 1368889980, 'foo')
        assert result == 'ts:1:1368889980:33'

    def test_get_model_key(self):
        result = self.db.get_model_key(1)
        assert result == 1

        result = self.db.get_model_key('foo')
        assert result == 'bf4e529197e56a48ae2737505b9736e4'

    def test_simple(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.incr(TSDBModel.project, 1, dts[0])
        self.db.incr(TSDBModel.project, 1, dts[1], count=3)
        self.db.incr(TSDBModel.project, 1, dts[2])
        self.db.incr_multi([
            (TSDBModel.project, 1),
            (TSDBModel.project, 2),
        ],
                           dts[3],
                           count=4)

        results = self.db.get_range(TSDBModel.project, [1], dts[0], dts[-1])
        assert results == {
            1: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 4),
            ],
        }
        results = self.db.get_range(TSDBModel.project, [2], dts[0], dts[-1])
        assert results == {
            2: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 4),
            ],
        }

        results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
        assert results == {
            1: 9,
            2: 4,
        }

    def test_count_distinct(self):
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        dts = [now + timedelta(hours=i) for i in xrange(4)]

        model = TSDBModel.users_affected_by_group

        def timestamp(d):
            t = int(to_timestamp(d))
            return t - (t % 3600)

        self.db.record(
            model,
            1,
            ('foo', 'bar'),
            dts[0],
        )

        self.db.record(
            model,
            1,
            ('baz', ),
            dts[1],
        )

        self.db.record_multi((
            (
                model,
                1,
                ('foo', 'bar', 'baz'),
            ),
            (
                model,
                2,
                ('bar', ),
            ),
        ), dts[2])

        self.db.record(
            model,
            2,
            ('foo', ),
            dts[3],
        )

        assert self.db.get_distinct_counts_series(
            model, [1], dts[0], dts[-1]) == {
                1: [
                    (timestamp(dts[0]), 2),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 3),
                    (timestamp(dts[3]), 0),
                ],
            }

        assert self.db.get_distinct_counts_series(
            model, [2], dts[0], dts[-1]) == {
                2: [
                    (timestamp(dts[0]), 0),
                    (timestamp(dts[1]), 0),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 1),
                ],
            }

        results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0],
                                                     dts[-1])
        assert results == {
            1: 3,
            2: 2,
        }