Пример #1
0
def test_single_no_filter(pipeline, clean_db):
  """
  Test grouping query with single result
  Verifies count is correct
  """

  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream')
  pipeline.create_cv_trigger('t0', 'cv0', 'true', 'pipeline_test_alert_new_row')
  time.sleep(1)

  rows = [(n % 3,) for n in range(1000)]
  pipeline.insert('stream', ('x',), rows)
  time.sleep(1)

  lines = pipeline.read_trigger_output()
  old_val = 0
  val = None

  for l in lines:
    assert(len(l) == 1)
    v = int(l[0])

    assert(v - old_val > 0)
    val = v

  assert(val == 1000)
Пример #2
0
def test_null_groups(pipeline, clean_db):
    """
    Verify that null group columns are considered equal
    """
    pipeline.create_stream('stream', x='int', y='int', z='int')
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM stream
    GROUP BY x, y, z;
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_null_groups', q)
    pipeline.create_table('test_null_groups_t', x='integer', y='integer', z='integer')

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() > 0.1 and n or None, vals)
        rows.append(tuple(vals))

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_null_groups_t', desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(pipeline.execute(table_q))
    result = list(pipeline.execute('SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z'))

    for r, e in zip(result, expected):
        assert r == e
Пример #3
0
def test_online_add_column(pipeline, clean_db):
    """
    Verify that we can add columns to a stream while not affecting running CQs
    """
    pipeline.create_stream("stream0", c0="integer")

    pipeline.create_cv("cv0", "SELECT c0 FROM stream0")
    pipeline.insert("stream0", ("c0",), [(n,) for n in range(0, 1000)])
    result = list(pipeline.execute("SELECT * FROM cv0"))

    assert len(result) == 1000

    for row in result:
        for col in row:
            assert col is not None

    pipeline.execute("ALTER STREAM stream0 ADD c1 integer")

    pipeline.create_cv("cv1", "SELECT c0, c1 FROM stream0")
    pipeline.insert("stream0", ("c0", "c1"), [(n, n) for n in range(1000, 2000)])
    result = list(pipeline.execute("SELECT * FROM cv1 WHERE c1 >= 1000"))

    assert len(result) == 1000

    for row in result:
        for col in row:
            assert col is not None

    pipeline.execute("ALTER STREAM stream0 ADD c2 integer")
    pipeline.create_cv("cv2", "SELECT c0, c1, c2 FROM stream0")
    pipeline.insert("stream0", ("c0", "c1", "c2"), [(n, n, n) for n in range(2000, 3000)])
    result = list(pipeline.execute("SELECT * FROM cv2 WHERE c2 >= 2000"))

    assert len(result) == 1000

    for row in result:
        for col in row:
            assert col is not None

    pipeline.execute("ALTER STREAM stream0 ADD c3 integer")
    pipeline.create_cv("cv3", "SELECT c0, c1, c2, c3 FROM stream0")
    pipeline.insert("stream0", ("c0", "c1", "c2", "c3"), [(n, n, n, n) for n in range(3000, 4000)])
    result = list(pipeline.execute("SELECT * FROM cv3 WHERE c3 >= 3000"))

    assert len(result) == 1000

    for row in result:
        for col in row:
            assert col is not None

    pipeline.execute("ALTER STREAM stream0 ADD c4 integer")
    pipeline.create_cv("cv4", "SELECT c0, c1, c2, c3, c4 FROM stream0")
    pipeline.insert("stream0", ("c0", "c1", "c2", "c3", "c4"), [(n, n, n, n, n) for n in range(4000, 5000)])
    result = list(pipeline.execute("SELECT * FROM cv4 WHERE c4 >= 4000"))

    assert len(result) == 1000

    for row in result:
        for col in row:
            assert col is not None
Пример #4
0
def test_single_continuous_view(pipeline, clean_db):
  """
  Verify that specific continuous views can be dropped and restored
  """
  pipeline.create_stream('stream0', x='int')
  pipeline.create_cv('test_single0', 'SELECT COUNT(*) FROM stream0')
  pipeline.create_cv('test_single1', 'SELECT COUNT(*) FROM stream0')
  pipeline.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = pipeline.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  result = pipeline.execute('SELECT count FROM test_single1').first()
  assert result['count'] == 10

  _dump(pipeline, 'test_single.sql', tables=['test_single0', 'stream0', 'test_single0_mrel'])

  pipeline.drop_all()
  _restore(pipeline, 'test_single.sql')

  result = pipeline.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  # We didn't dump this one
  result = list(pipeline.execute('SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\''))
  assert not result
Пример #5
0
def test_hll_agg_hashing(pipeline, clean_db):
    """
    Verify that hll_agg correctly hashes different input types
    """
    pipeline.create_stream('test_hll_stream', x='int', y='text', z='float8')
    q = """
    SELECT hll_agg(x::integer) AS i,
    hll_agg(y::text) AS t,
    hll_agg(z::float8) AS f FROM test_hll_stream
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_hll_hashing', q)

    rows = []
    for n in range(10000):
        rows.append((n, '%d' % n, float(n)))
        rows.append((n, '%05d' % n, float(n)))

    pipeline.insert('test_hll_stream', desc, rows)

    cvq = """
    SELECT hll_cardinality(i),
    hll_cardinality(t), hll_cardinality(f) FROM test_hll_hashing
    """
    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 9976
    assert result[1] == 19951
    assert result[2] == 10062
Пример #6
0
def test_stats_aggs(pipeline, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_stats_aggs', q)
    pipeline.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
Пример #7
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that Bloom filters's with low and high cardinalities are correcly
    unioned
    """
    q = """
    SELECT k::integer, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_bloom_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_bloom_stream', desc, rows)

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (2, 3)').first()
    assert result[0] == 8879

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg').first()
    assert result[0] == 8881
Пример #8
0
def test_cmsketch_agg(pipeline, clean_db):
    """
    Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile
    """
    pipeline.create_stream('test_cmsketch_stream', k='int', x='int')

    q = """
    SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_cmsketch_agg', q)

    rows = []
    for n in range(1000):
        rows.append((0, n % 20))
        rows.append((1, n % 50))

    pipeline.insert('test_cmsketch_stream', desc, rows)

    result = list(pipeline.execute(
      'SELECT cmsketch_frequency(c, 10) AS x, cmsketch_frequency(c, 40) AS y, '
      'cmsketch_frequency(c, 60) FROM test_cmsketch_agg ORDER BY k').fetchall())
    assert len(result) == 2
    assert tuple(result[0]) == (50, 0, 0)
    assert tuple(result[1]) == (20, 20, 0)

    result = list(pipeline.execute(
      'SELECT cmsketch_frequency(combine(c), 10) AS x, '
      'cmsketch_frequency(combine(c), 40) AS y, cmsketch_frequency(combine(c), 60) '
      'FROM test_cmsketch_agg').fetchall())
    assert len(result) == 1
    assert tuple(result[0]) == (70, 20, 0)
def test_stream_stats(pipeline, clean_db):
    """
  Verify that stream-level statistics collection works
  """
    # create a few streams
    for n in range(8):
        sname = 's%d' % n
        pipeline.create_stream(sname, x='int')
        cvname = 'cv%d' % n
        pipeline.create_cv(cvname, 'SELECT count(*) FROM %s' % sname)

    for n in range(8):
        sname = 's%d' % n
        x = n + 1
        values = [(v, ) for v in range(1000 * x)]
        pipeline.insert(sname, ('x', ), values)

    time.sleep(2)

    for n in range(8):
        sname = 's%d' % n
        row = pipeline.execute(
            "SELECT stream, input_rows, input_batches, input_bytes FROM pipelinedb.stream_stats WHERE stream = '%s'"
            % sname)[0]
        x = n + 1
        assert row['input_rows'] == 1000 * x
Пример #10
0
def test_hll_distinct(pipeline, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream GROUP BY k
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_hll_distinct', q)
    pipeline.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
Пример #11
0
def test_stats_aggs(pipeline, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_stats_aggs', q)
    pipeline.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
Пример #12
0
def test_avg_no_filter(pipeline, clean_db):
  """
  Test grouping query with avg, when clause passes everything
  Verifies averages are correct
  """
  pipeline.create_cv('cv0', 'SELECT x::integer,avg(y::real) FROM stream group by x')
  pipeline.create_cv_trigger('t0', 'cv0', 'true', 'pipeline_test_alert_new_row')
  time.sleep(1)

  rows = [(n % 3,n) for n in range(1000)]
  pipeline.insert('stream', ('x','y'), rows)
  time.sleep(1)

  lines = pipeline.read_trigger_output()
  d = {}

  for l in lines:
    assert(len(l) == 2)

    k = int(l[0])
    v = float(l[1])

    assert(k >= 0 and k <= 2)

    if (not d.has_key(k)):
      d[k] = 0

    d[k] = v

  assert(d[0] == 499.5)
  assert(d[1] == 499)
  assert(d[2] == 500)
Пример #13
0
def test_append_no_filter(pipeline, clean_db):
  """
  Test append type query with no filter
  Verifies that every inserted row triggers, and keys are valid
  Verifies we receive the correct number of each group
  """

  pipeline.create_cv('cv0', 'SELECT x::int FROM stream')
  pipeline.create_cv_trigger('t0', 'cv0', 'true', 'pipeline_test_alert_new_row')
  time.sleep(1)

  rows = [(n % 3,) for n in range(1000)]
  pipeline.insert('stream', ('x',), rows)
  time.sleep(2)

  lines = pipeline.read_trigger_output()
  assert(len(lines) == 1000)

  d = {}

  for l in lines:
    assert(len(l) == 1)
    k = int(l[0])

    assert(k >= 0 and k <= 2)

    if (not d.has_key(k)):
      d[k] = 0

    d[k] += 1

  assert(d[0] == 334)
  assert(d[1] == 333)
  assert(d[2] == 333)
Пример #14
0
def test_single_with_threshold(pipeline, clean_db):
  """
  Test grouping query with single result and a filter when new.count > 100
  Verifies trigger rows meet the filter criteria, and final count is correct
  """

  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream')
  pipeline.create_cv_trigger('t0', 'cv0', 'new.count > 100', 'pipeline_test_alert_new_row')
  time.sleep(1)

  rows = [(n % 3,) for n in range(1000)]
  pipeline.insert('stream', ('x',), rows)
  time.sleep(1)

  lines = pipeline.read_trigger_output()
  old_val = 0
  val = None

  for l in lines:
    assert(len(l) == 1)
    v = int(l[0])

    assert(v - old_val > 0)
    assert(v > 100)
    val = v

  assert(val == 1000)
Пример #15
0
def test_bloom_intersection(pipeline, clean_db):
    """
  Verify that bloom_intersection works
  """
    pipeline.create_stream("test_bloom_stream", x="int", k="int")

    q = """
  SELECT k::int, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
  """

    desc = ("k", "x")
    pipeline.create_cv("test_bloom_intersection", q)

    rows = []
    for i in range(10000):
        rows.append((0, 2 * i))
        rows.append((1, i))

    pipeline.insert("test_bloom_stream", desc, rows)

    cvq = """
  SELECT bloom_cardinality(bloom_intersection_agg(bloom_agg))
  FROM test_bloom_intersection
  """

    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 5530
Пример #16
0
def test_filter_clause(pipeline, clean_db):
    """
    Verify that FILTER clauses work on aggregates and sliding window aggregates
    """
    pipeline.create_stream("test_filter_stream", x="int")
    q = """
    SELECT SUM(x::int) FILTER (WHERE mod(x, 2) = 0) AS sum2, SUM(x::int) FILTER (WHERE mod(x, 3) = 0) AS sum3 FROM test_filter_stream
    """
    sw = """
    WHERE arrival_timestamp > clock_timestamp() - interval '30 second'
    """
    pipeline.create_cv("test_filter", q)
    pipeline.create_cv("test_filter_sw", "%s %s" % (q, sw))

    desc = ("x",)
    rows = []
    for n in range(1000):
        rows.append((n,))

    pipeline.insert("test_filter_stream", desc, rows)

    sum2 = sum(filter(lambda x: x % 2 == 0, map(lambda x: x[0], rows)))
    sum3 = sum(filter(lambda x: x % 3 == 0, map(lambda x: x[0], rows)))

    result1 = pipeline.execute("SELECT * FROM test_filter").first()
    result2 = pipeline.execute("SELECT * FROM test_filter_sw").first()

    assert result1["sum2"] == result2["sum2"] == sum2
    assert result1["sum3"] == result2["sum3"] == sum3
Пример #17
0
def test_combine_in_view(pipeline, clean_db):
    """
    Verify that combines in views on top of continuous views work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream GROUP BY x
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_combine_view', q)
    pipeline.execute(
        'CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view')

    rows = []
    for n in range(10000):
        rows.append((random.randint(1, 256), random.randint(1, 1024)))

    pipeline.insert('stream', desc, rows)

    view = list(pipeline.execute('SELECT * FROM v'))

    assert len(view) == 1

    expected = sum(r[1] for r in rows) / float(len(rows))

    assert abs(float(view[0][0]) - expected) < 0.00001

    pipeline.execute('DROP VIEW v')
Пример #18
0
def test_null_groups(pipeline, clean_db):
    """
    Verify that null group columns are considered equal
    """
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM stream
    GROUP BY x, y, z;
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_null_groups', q)
    pipeline.create_table('test_null_groups_t',
                          x='integer',
                          y='integer',
                          z='integer')

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() < 0.1 and None or n, vals)
        rows.append(tuple(vals))

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_null_groups_t', desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(pipeline.execute(table_q))
    result = list(
        pipeline.execute(
            'SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z'))

    for r, e in zip(result, expected):
        assert r == e
Пример #19
0
def _test_agg(pipeline, agg, check_fn=None):
    name = agg[:agg.find('(')]
    q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s'
    cv_name = 'test_%s' % name
    table_name = 'test_%s_t' % name
    desc = ('ts', 'g', 'x', 'y', 'z')

    pipeline.create_cv(cv_name, q % (agg, 'stream'))
    pipeline.create_table(table_name, ts='timestamp', x='integer', y='integer', z='integer', g='integer')

    rows = []
    for i, n in enumerate(range(1000)):
        ts = str(datetime.utcnow() + timedelta(seconds=i))
        row = ts, n % 10, random.randint(1, 256), random.randint(1, 256), random.randint(1, 256)
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert(table_name, desc, rows)

    if check_fn:
        return check_fn(pipeline)

    expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g'))
    result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name))

    assert len(expected) == len(result)

    for e, r in zip(expected, result):
        assert e == r

    pipeline.drop_cv(cv_name)
    pipeline.drop_table(table_name)
Пример #20
0
def test_sliding_windows(pipeline, clean_db):
    """
  Verify that sliding window queries are properly dumped and restored
  """
    pipeline.execute("CREATE CONTINUOUS VIEW sw_v WITH (max_age = '20 seconds') AS SELECT count(*) FROM stream")
    pipeline.insert("stream", ("x",), [(x,) for x in range(10)])

    result = pipeline.execute("SELECT count FROM sw_v").first()
    assert result["count"] == 10

    _dump(pipeline, "test_sw.sql")

    pipeline.drop_all_views()
    _restore(pipeline, "test_sw.sql")

    result = pipeline.execute("SELECT count FROM sw_v").first()
    assert result["count"] == 10

    # We should still drop back to 0 within 20 seconds
    result = pipeline.execute("SELECT count FROM sw_v").first()
    while result["count"] > 0:
        time.sleep(1)
        result = pipeline.execute("SELECT count FROM sw_v").first()

    result = pipeline.execute("SELECT count FROM sw_v").first()
    assert result["count"] == 0
Пример #21
0
def test_bloom_intersection(pipeline, clean_db):
    """
  Verify that bloom_intersection works
  """
    q = """
  SELECT k::int, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
  """

    desc = ('k', 'x')
    pipeline.create_cv('test_bloom_intersection', q)

    rows = []
    for i in range(10000):
        rows.append((0, 2 * i))
        rows.append((1, i))

    pipeline.insert('test_bloom_stream', desc, rows)

    cvq = """
  SELECT bloom_cardinality(bloom_intersection_agg(bloom_agg))
  FROM test_bloom_intersection
  """

    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 5530
Пример #22
0
def test_single_continuous_view(pipeline, clean_db):
    """
  Verify that specific continuous views can be dropped and restored
  """
    pipeline.create_cv("test_single0", "SELECT COUNT(*) FROM stream")
    pipeline.create_cv("test_single1", "SELECT COUNT(*) FROM stream")
    pipeline.insert("stream", ("x",), [(x,) for x in range(10)])

    result = pipeline.execute("SELECT count FROM test_single0").first()
    assert result["count"] == 10

    result = pipeline.execute("SELECT count FROM test_single1").first()
    assert result["count"] == 10

    _dump(pipeline, "test_single.sql", cv_name="test_single0")

    pipeline.drop_all_views()
    _restore(pipeline, "test_single.sql")

    result = pipeline.execute("SELECT count FROM test_single0").first()
    assert result["count"] == 10

    # We didn't dump this one
    result = list(pipeline.execute("SELECT * FROM pg_class WHERE relname LIKE '%%test_single1%%'"))
    assert not result
Пример #23
0
 def insert():
     while not stop:
         values = map(
             lambda _: (random.randint(0, 20), random.randint(0, 1000000)),
             xrange(1000))
         pipeline.insert('test_vacuum_stream', ('x', 'y'), values)
         time.sleep(0.01)
Пример #24
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that Bloom filters's with low and high cardinalities are correcly
    unioned
    """
    q = """
    SELECT k::integer, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_bloom_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_bloom_stream', desc, rows)

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (2, 3)').first()
    assert result[0] == 8879

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg').first()
    assert result[0] == 8881
Пример #25
0
def test_hll_agg_hashing(pipeline, clean_db):
    """
    Verify that hll_agg correctly hashes different input types
    """
    pipeline.create_stream('test_hll_stream', x='int', y='text', z='float8')
    q = """
    SELECT hll_agg(x::integer) AS i,
    hll_agg(y::text) AS t,
    hll_agg(z::float8) AS f FROM test_hll_stream
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_hll_hashing', q)

    rows = []
    for n in range(10000):
        rows.append((n, '%d' % n, float(n)))
        rows.append((n, '%05d' % n, float(n)))

    pipeline.insert('test_hll_stream', desc, rows)

    cvq = """
    SELECT hll_cardinality(i),
    hll_cardinality(t), hll_cardinality(f) FROM test_hll_hashing
    """
    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 9976
    assert result[1] == 19951
    assert result[2] == 10062
Пример #26
0
def test_bloom_agg_hashing(pipeline, clean_db):
    """
    Verify that bloom_agg correctly hashes different input types
    """
    q = """
    SELECT bloom_agg(x::integer) AS i,
    bloom_agg(y::text) AS t,
    bloom_agg(z::float8) AS f FROM test_bloom_stream
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_bloom_hashing', q)

    rows = []
    for n in range(10000):
        rows.append((n, '%d' % n, float(n)))
        rows.append((n, '%05d' % n, float(n)))

    pipeline.insert('test_bloom_stream', desc, rows)

    cvq = """
    SELECT bloom_cardinality(i),
    bloom_cardinality(t), bloom_cardinality(f) FROM test_bloom_hashing
    """
    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 8879
    assert result[1] == 15614
    assert result[2] == 8855
Пример #27
0
def test_combine_in_view(pipeline, clean_db):
    """
    Verify that combines in views on top of continuous views work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x
    """
    desc = ('x', 'y')
    pipeline.create_stream('stream0', x='int', y='float8')
    pipeline.create_cv('test_combine_view', q)
    pipeline.execute('CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view')

    rows = []
    for n in range(10000):
        rows.append((random.randint(1, 256), random.randint(1, 1024)))

    pipeline.insert('stream0', desc, rows)

    view = list(pipeline.execute('SELECT * FROM v'))

    assert len(view) == 1

    expected = sum(r[1] for r in rows) / float(len(rows))

    assert abs(float(view[0][0]) - expected) < 0.00001

    pipeline.execute('DROP VIEW v')
Пример #28
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    pipeline.create_stream('stream', x='int', y='text', z='int')
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(name,
                       "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream', ('x', 'y', 'z'), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
Пример #29
0
def test_sliding_windows(pipeline, clean_db):
    """
  Verify that sliding window queries are properly dumped and restored
  """
    pipeline.create_stream('stream0', x='int')
    pipeline.execute(
        'CREATE CONTINUOUS VIEW sw_v WITH (sw = \'20 seconds\') AS SELECT count(*) FROM stream0'
    )
    pipeline.insert('stream0', ('x', ), [(x, ) for x in range(10)])

    result = pipeline.execute('SELECT count FROM sw_v').first()
    assert result['count'] == 10

    _dump(pipeline, 'test_sw.sql')

    pipeline.drop_all()
    _restore(pipeline, 'test_sw.sql')

    result = pipeline.execute('SELECT count FROM sw_v').first()
    assert result['count'] == 10

    # We should still drop back to 0 within 20 seconds
    result = pipeline.execute('SELECT count FROM sw_v').first()
    while result['count'] > 0:
        time.sleep(1)
        result = pipeline.execute('SELECT count FROM sw_v').first()

    result = pipeline.execute('SELECT count FROM sw_v').first()
    assert result['count'] == 0
Пример #30
0
def test_group_filter(pipeline, clean_db):
    """
  Test grouping query with when clause new.x = 1
  Verifies count is correct and other data is filtered out
  """

    pipeline.create_cv('cv0',
                       'SELECT x::integer,count(*) FROM stream group by x')
    pipeline.create_cv_trigger('t0', 'cv0', 'new.x = 1',
                               'pipeline_test_alert_new_row')
    time.sleep(1)

    rows = [(n % 3, ) for n in range(1000)]
    pipeline.insert('stream', ('x', ), rows)
    time.sleep(1)

    lines = pipeline.read_trigger_output()
    old_val = 0
    val = None

    for l in lines:
        assert (len(l) == 2)

        k = int(l[0])
        v = int(l[1])

        assert (k == 1)
        assert (v - old_val > 0)
        val = v

    assert (val == 333)
Пример #31
0
def test_bloom_agg_hashing(pipeline, clean_db):
    """
  Verify that bloom_agg correctly hashes different input types
  """
    pipeline.create_stream("test_bloom_stream", x="int", y="text", z="float8")

    q = """
  SELECT bloom_agg(x::integer) AS i,
  bloom_agg(y::text) AS t,
  bloom_agg(z::float8) AS f FROM test_bloom_stream
  """
    desc = ("x", "y", "z")
    pipeline.create_cv("test_bloom_hashing", q)

    rows = []
    for n in range(10000):
        rows.append((n, "%d" % n, float(n)))
        rows.append((n, "%05d" % n, float(n)))

    pipeline.insert("test_bloom_stream", desc, rows)

    cvq = """
  SELECT bloom_cardinality(i),
  bloom_cardinality(t), bloom_cardinality(f) FROM test_bloom_hashing
  """
    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 8879
    assert result[1] == 15614
    assert result[2] == 8855
Пример #32
0
def test_single_no_filter(pipeline, clean_db):
    """
  Test grouping query with single result
  Verifies count is correct
  """

    pipeline.create_cv('cv0', 'SELECT count(*) FROM stream')
    pipeline.create_cv_trigger('t0', 'cv0', 'true',
                               'pipeline_test_alert_new_row')
    time.sleep(1)

    rows = [(n % 3, ) for n in range(1000)]
    pipeline.insert('stream', ('x', ), rows)
    time.sleep(1)

    lines = pipeline.read_trigger_output()
    old_val = 0
    val = None

    for l in lines:
        assert (len(l) == 1)
        v = int(l[0])

        assert (v - old_val > 0)
        val = v

    assert (val == 1000)
Пример #33
0
def test_hll_distinct(pipeline, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream GROUP BY k
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_hll_distinct', q)
    pipeline.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
Пример #34
0
def test_single_with_threshold(pipeline, clean_db):
    """
  Test grouping query with single result and a filter when new.count > 100
  Verifies trigger rows meet the filter criteria, and final count is correct
  """

    pipeline.create_cv('cv0', 'SELECT count(*) FROM stream')
    pipeline.create_cv_trigger('t0', 'cv0', 'new.count > 100',
                               'pipeline_test_alert_new_row')
    time.sleep(1)

    rows = [(n % 3, ) for n in range(1000)]
    pipeline.insert('stream', ('x', ), rows)
    time.sleep(1)

    lines = pipeline.read_trigger_output()
    old_val = 0
    val = None

    for l in lines:
        assert (len(l) == 1)
        v = int(l[0])

        assert (v - old_val > 0)
        assert (v > 100)
        val = v

    assert (val == 1000)
Пример #35
0
def test_simple_aggs(pipeline, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_simple_aggs', q)
    pipeline.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_simple_aggs_t', desc, rows)

    table_result = list(
        pipeline.execute(
            'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(
        pipeline.execute(
            'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'
        ))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
Пример #36
0
def test_append_no_filter(pipeline, clean_db):
    """
  Test append type query with no filter
  Verifies that every inserted row triggers, and keys are valid
  Verifies we receive the correct number of each group
  """

    pipeline.create_cv('cv0', 'SELECT x::int FROM stream')
    pipeline.create_cv_trigger('t0', 'cv0', 'true',
                               'pipeline_test_alert_new_row')
    time.sleep(1)

    rows = [(n % 3, ) for n in range(1000)]
    pipeline.insert('stream', ('x', ), rows)
    time.sleep(2)

    lines = pipeline.read_trigger_output()
    assert (len(lines) == 1000)

    d = {}

    for l in lines:
        assert (len(l) == 1)
        k = int(l[0])

        assert (k >= 0 and k <= 2)

        if (not d.has_key(k)):
            d[k] = 0

        d[k] += 1

    assert (d[0] == 334)
    assert (d[1] == 333)
    assert (d[2] == 333)
Пример #37
0
def test_filter_clause(pipeline, clean_db):
    """
    Verify that FILTER clauses work on aggregates and sliding window aggregates
    """
    pipeline.create_stream('test_filter_stream', x='int')
    q = """
    SELECT SUM(x::int) FILTER (WHERE mod(x, 2) = 0) AS sum2, SUM(x::int) FILTER (WHERE mod(x, 3) = 0) AS sum3 FROM test_filter_stream
    """
    sw = """
    WHERE arrival_timestamp > clock_timestamp() - interval '30 second'
    """
    pipeline.create_cv('test_filter', q)
    pipeline.create_cv('test_filter_sw', '%s %s' % (q, sw))

    desc = ('x', )
    rows = []
    for n in range(1000):
        rows.append((n, ))

    pipeline.insert('test_filter_stream', desc, rows)

    sum2 = sum(filter(lambda x: x % 2 == 0, map(lambda x: x[0], rows)))
    sum3 = sum(filter(lambda x: x % 3 == 0, map(lambda x: x[0], rows)))

    result1 = pipeline.execute('SELECT * FROM test_filter').first()
    result2 = pipeline.execute('SELECT * FROM test_filter_sw').first()

    assert result1['sum2'] == result2['sum2'] == sum2
    assert result1['sum3'] == result2['sum3'] == sum3
Пример #38
0
def test_avg_no_filter(pipeline, clean_db):
    """
  Test grouping query with avg, when clause passes everything
  Verifies averages are correct
  """
    pipeline.create_cv(
        'cv0', 'SELECT x::integer,avg(y::real) FROM stream group by x')
    pipeline.create_cv_trigger('t0', 'cv0', 'true',
                               'pipeline_test_alert_new_row')
    time.sleep(1)

    rows = [(n % 3, n) for n in range(1000)]
    pipeline.insert('stream', ('x', 'y'), rows)
    time.sleep(1)

    lines = pipeline.read_trigger_output()
    d = {}

    for l in lines:
        assert (len(l) == 2)

        k = int(l[0])
        v = float(l[1])

        assert (k >= 0 and k <= 2)

        if (not d.has_key(k)):
            d[k] = 0

        d[k] = v

    assert (d[0] == 499.5)
    assert (d[1] == 499)
    assert (d[2] == 500)
Пример #39
0
def test_bloom_contains(pipeline, clean_db):
    """
  Verify that bloom_contains works
  """
    q = """
  SELECT bloom_agg(x::integer) FROM test_bloom_stream
  """

    desc = ('x')
    pipeline.create_cv('test_bloom_contains', q)

    rows = []
    for i in range(10000):
        rows.append((2 * i, ))

    pipeline.insert('test_bloom_stream', desc, rows)

    cvq = """
  SELECT bloom_contains(bloom_agg, 0), bloom_contains(bloom_agg, 5000),
  bloom_contains(bloom_agg, 1), bloom_contains(bloom_agg, 5001)
  FROM test_bloom_contains
  """

    result = list(pipeline.execute(cvq))

    assert len(result) == 1
    result = result[0]
    assert result[0] == True
    assert result[1] == True
    assert result[2] == False
    assert result[3] == False
Пример #40
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'"
        % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream', ('x', 'y', 'z'), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
Пример #41
0
def test_bloom_agg_hashing(pipeline, clean_db):
    """
    Verify that bloom_agg correctly hashes different input types
    """
    q = """
    SELECT bloom_agg(x::integer) AS i,
    bloom_agg(y::text) AS t,
    bloom_agg(z::float8) AS f FROM test_bloom_stream
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_bloom_hashing', q)

    rows = []
    for n in range(10000):
        rows.append((n, '%d' % n, float(n)))
        rows.append((n, '%05d' % n, float(n)))

    pipeline.insert('test_bloom_stream', desc, rows)

    cvq = """
    SELECT bloom_cardinality(i),
    bloom_cardinality(t), bloom_cardinality(f) FROM test_bloom_hashing
    """
    result = list(pipeline.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 8879
    assert result[1] == 15614
    assert result[2] == 8855
Пример #42
0
def test_fss_agg(pipeline, clean_db):
    q = """
  SELECT k::text, fss_agg(x::int, 5) FROM test_fss_stream
  GROUP BY k
  """
    desc = ('k', 'x')
    pipeline.create_cv('test_fss_agg', q)

    items = range(14)
    random.shuffle(items)
    a_items = items
    b_items = list(reversed(items))

    values = map(lambda i: ('a', i), get_geometric_dist(a_items))
    values.extend(map(lambda i: ('b', i), get_geometric_dist(b_items)))
    random.shuffle(values)

    pipeline.insert('test_fss_stream', desc, values)
    result = list(
        pipeline.execute(
            'SELECT k, fss_topk_values(fss_agg) FROM test_fss_agg ORDER BY k'))
    topk = map(int, result[0][1].rstrip('}').lstrip('{').split(','))
    assert sorted(topk) == sorted(a_items[-5:])
    topk = map(int, result[1][1].rstrip('}').lstrip('{').split(','))
    assert sorted(topk) == sorted(b_items[-5:])
Пример #43
0
def test_sliding_windows(pipeline, clean_db):
  """
  Verify that sliding window queries are properly dumped and restored
  """
  pipeline.create_stream('stream0', x='int')
  pipeline.execute('CREATE CONTINUOUS VIEW sw_v WITH (sw = \'20 seconds\') AS SELECT count(*) FROM stream0')
  pipeline.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = pipeline.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 10

  _dump(pipeline, 'test_sw.sql')

  pipeline.drop_all()
  _restore(pipeline, 'test_sw.sql')

  result = pipeline.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 10

  # We should still drop back to 0 within 20 seconds
  result = pipeline.execute('SELECT count FROM sw_v').first()
  while result['count'] > 0:
    time.sleep(1)
    result = pipeline.execute('SELECT count FROM sw_v').first()

  result = pipeline.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 0
Пример #44
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    name = "assert_%s_decreases" % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args),
    )

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert("stream", ("x", "y", "z"), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute("SELECT * FROM %s" % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
def test_indexed(pipeline, clean_db):
    """
    Verify that stream-table joins involving indexed tables work
    """
    pipeline.create_stream('stream', x='int', y='int')
    q = """
    SELECT stream.x::integer, count(*) FROM stream
    JOIN test_indexed_t t ON stream.x = t.x GROUP BY stream.x
    """
    pipeline.create_table('test_indexed_t', x='integer', y='integer')
    pipeline.execute('CREATE INDEX idx ON test_indexed_t(x)')

    t = _generate_rows(2, 1000)
    s = _generate_rows(2, 1000)

    pipeline.insert('test_indexed_t', ('x', 'y'), t)
    time.sleep(0.1)

    pipeline.create_cv('test_indexed', q)
    pipeline.insert('stream', ('x', 'y'), s)

    expected = _join(s, t, [0])
    result = pipeline.execute('SELECT sum(count) FROM test_indexed').first()

    assert result['sum'] == len(expected)
Пример #46
0
def test_postmaster_worker_recovery(pipeline, clean_db):
  """
  Verify that the Postmaster only restarts crashed worker processes, and does not
  attempt to start them when the continuous query scheduler should.
  """
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  expected_workers = result['count']

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  expected_combiners = result['count']

  q = 'SELECT COUNT(*) FROM stream'
  pipeline.create_cv('test_pm_recovery', q)
  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  def backend():
    try:
      # Just keep a long-running backend connection open
      client = pipeline.engine.connect()
      client.execute('SELECT pg_sleep(10000)')
    except:
      pass

  t = threading.Thread(target=backend)
  t.start()

  attempts = 0
  result = None
  backend_pid = 0

  while not result and attempts < 10:
    result = pipeline.execute("""SELECT pid, query FROM pg_stat_activity WHERE lower(query) LIKE '%%pg_sleep%%'""").first()
    time.sleep(1)
    attempts += 1

  assert result

  backend_pid = result['pid']
  os.kill(backend_pid, signal.SIGKILL)

  attempts = 0
  pipeline.conn = None

  while attempts < 15:
    try:
      pipeline.conn = pipeline.engine.connect()
      break
    except:
      time.sleep(1)
      pass
    attempts += 1

  assert pipeline.conn

  # Now verify that we have the correct number of CQ worker procs
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  assert result['count'] == expected_workers

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  assert result['count'] == expected_combiners
Пример #47
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that HLL's with low and high cardinalities are correcly combined
    """
    pipeline.create_stream('test_hll_stream', x='int', k='integer')
    q = """
    SELECT k::integer, hll_agg(x::integer) FROM test_hll_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_hll_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_hll_stream', desc, rows)

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (2, 3)').first()
    assert result[0] == 9976

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg').first()
    assert result[0] == 9983
Пример #48
0
def test_simple_crash(pipeline, clean_db):
  """
  Test simple worker and combiner crashes.
  """
  q = 'SELECT COUNT(*) FROM stream'
  pipeline.create_cv('test_simple_crash', q)

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 2

  # We can potentially lose one batch for a worker or combiner crash.
  # In our case each batch adds a count 2 and since we're adding 3 batches
  # we should either see an increment from the previous count of 4 or 6.
  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  assert kill_worker()

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 6

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  assert kill_combiner()

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 10
Пример #49
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that HLL's with low and high cardinalities are correcly combined
    """
    pipeline.create_stream('test_hll_stream', x='int', k='integer')
    q = """
    SELECT k::integer, hll_agg(x::integer) FROM test_hll_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_hll_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_hll_stream', desc, rows)

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (2, 3)').first()
    assert result[0] == 9976

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg').first()
    assert result[0] == 9983
Пример #50
0
def test_sliding_windows(pipeline, clean_db):
  """
  Verify that sliding window queries are properly dumped and restored
  """
  pipeline.create_stream('stream0', x='int')
  pipeline.execute('CREATE VIEW sw_v WITH (sw = \'20 seconds\') AS SELECT count(*) FROM stream0')
  pipeline.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = pipeline.execute('SELECT count FROM sw_v')[0]
  assert result['count'] == 10

  _dump(pipeline, 'test_sw.sql')

  pipeline.drop_all()
  _restore(pipeline, 'test_sw.sql')

  result = pipeline.execute('SELECT count FROM sw_v')[0]
  assert result['count'] == 10

  # We should still drop back to 0 within 20 seconds
  result = pipeline.execute('SELECT count FROM sw_v')[0]
  while result['count'] > 0:
    time.sleep(1)
    result = pipeline.execute('SELECT count FROM sw_v')[0]

  result = pipeline.execute('SELECT count FROM sw_v')[0]
    # Disabled until #157 (currently combine doesn't return 0 on NULL input for this aggregate)
  # assert result == 0
  assert result['count'] is None
def test_indexed(pipeline, clean_db):
  """
  Verify that stream-table joins involving indexed tables work
  """
  pipeline.create_stream('stream0', x='int', y='int')
  q = """
  SELECT stream0.x::integer, count(*) FROM stream0
  JOIN test_indexed_t t ON stream0.x = t.x GROUP BY stream0.x
  """
  pipeline.create_table('test_indexed_t', x='integer', y='integer')
  pipeline.execute('CREATE INDEX idx ON test_indexed_t(x)')

  t = _generate_rows(2, 1000)
  s = _generate_rows(2, 1000)

  pipeline.insert('test_indexed_t', ('x', 'y'), t)
  time.sleep(0.1)

  pipeline.create_cv('test_indexed', q)
  pipeline.insert('stream0', ('x', 'y'), s)

  expected = _join(s, t, [0])
  result = pipeline.execute('SELECT sum(count) FROM test_indexed')[0]

  assert result['sum'] == len(expected)
Пример #52
0
def test_cmsketch_agg(pipeline, clean_db):
    """
    Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile
    """
    q = """
    SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_cmsketch_agg', q)

    rows = []
    for n in range(1000):
        rows.append((0, n % 20))
        rows.append((1, n % 50))

    pipeline.insert('test_cmsketch_stream', desc, rows)

    result = list(
        pipeline.execute(
            'SELECT cmsketch_frequency(c, 10) AS x, cmsketch_frequency(c, 40) AS y, '
            'cmsketch_frequency(c, 60) FROM test_cmsketch_agg ORDER BY k').
        fetchall())
    assert len(result) == 2
    assert tuple(result[0]) == (50, 0, 0)
    assert tuple(result[1]) == (20, 20, 0)

    result = list(
        pipeline.execute(
            'SELECT cmsketch_frequency(combine(c), 10) AS x, '
            'cmsketch_frequency(combine(c), 40) AS y, cmsketch_frequency(combine(c), 60) '
            'FROM test_cmsketch_agg').fetchall())
    assert len(result) == 1
    assert tuple(result[0]) == (70, 20, 0)
Пример #53
0
 def insert():
   while not stop:
     values = map(lambda _: (random.randint(0, 20),
                             random.randint(0, 1000000)),
                  xrange(1000))
     pipeline.insert('test_vacuum_stream', ('x', 'y'), values)
     time.sleep(0.01)
Пример #54
0
def test_restart_recovery(pipeline, clean_db):
    pipeline.create_stream('stream0', x='int')
    q = 'SELECT COUNT(*) FROM stream0'
    pipeline.create_cv('test_restart_recovery', q)

    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 2

    # Need to sleep here, otherwise on restart the materialization table is
    # empty. Not sure why.
    time.sleep(0.1)

    # Restart.
    pipeline.stop()
    pipeline.run()

    result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 2

    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 4
Пример #55
0
def test_simple_aggs(pipeline, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_stream('stream0', x='int', y='float8')
    pipeline.create_cv('test_simple_aggs', q)
    pipeline.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream0', desc, rows)
    pipeline.insert('test_simple_aggs_t', desc, rows)

    table_result = list(pipeline.execute('SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(pipeline.execute('SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
Пример #56
0
def test_simple_crash(pipeline, clean_db):
    """
  Test simple worker and combiner crashes.
  """
    pipeline.create_stream('stream0', x='int')
    q = 'SELECT COUNT(*) FROM stream0'
    pipeline.create_cv('test_simple_crash', q)

    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 2

    # This batch can potentially get lost.
    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    assert kill_worker()

    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] in [4, 6]

    # This batch can potentially get lost.
    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    assert kill_combiner()

    pipeline.insert('stream0', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] in [6, 8, 10]

    # To ensure that all remaining events in ZMQ queues have been consumed
    time.sleep(2)
Пример #57
0
def test_single_continuous_view(pipeline, clean_db):
    """
  Verify that specific continuous views can be dropped and restored
  """
    pipeline.create_stream('stream0', x='int')
    pipeline.create_cv('test_single0', 'SELECT COUNT(*) FROM stream0')
    pipeline.create_cv('test_single1', 'SELECT COUNT(*) FROM stream0')
    pipeline.insert('stream0', ('x', ), [(x, ) for x in range(10)])

    result = pipeline.execute('SELECT count FROM test_single0').first()
    assert result['count'] == 10

    result = pipeline.execute('SELECT count FROM test_single1').first()
    assert result['count'] == 10

    _dump(pipeline,
          'test_single.sql',
          tables=['test_single0', 'stream0', 'test_single0_mrel'])

    pipeline.drop_all()
    _restore(pipeline, 'test_single.sql')

    result = pipeline.execute('SELECT count FROM test_single0').first()
    assert result['count'] == 10

    # We didn't dump this one
    result = list(
        pipeline.execute(
            'SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\''))
    assert not result
Пример #58
0
def test_bloom_contains(pipeline, clean_db):
    """
  Verify that bloom_contains works
  """
    pipeline.create_stream("test_bloom_stream", x="int")

    q = """
  SELECT bloom_agg(x::integer) FROM test_bloom_stream
  """

    desc = "x"
    pipeline.create_cv("test_bloom_contains", q)

    rows = []
    for i in range(10000):
        rows.append((2 * i,))

    pipeline.insert("test_bloom_stream", desc, rows)

    cvq = """
  SELECT bloom_contains(bloom_agg, 0), bloom_contains(bloom_agg, 5000),
  bloom_contains(bloom_agg, 1), bloom_contains(bloom_agg, 5001)
  FROM test_bloom_contains
  """

    result = list(pipeline.execute(cvq))

    assert len(result) == 1
    result = result[0]
    assert result[0] == True
    assert result[1] == True
    assert result[2] == False
    assert result[3] == False
Пример #59
0
def test_simple_crash(pipeline, clean_db):
    """
  Test simple worker and combiner crashes.
  """
    q = 'SELECT COUNT(*) FROM stream'
    pipeline.create_cv('test_simple_crash', q)

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 2

    # We can potentially lose one batch for a worker or combiner crash.
    # In our case each batch adds a count 2 and since we're adding 3 batches
    # we should either see an increment from the previous count of 4 or 6.
    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    assert kill_worker()

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 6

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    assert kill_combiner()

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 10
Пример #60
0
def test_postmaster_worker_recovery(pipeline, clean_db):
  """
  Verify that the Postmaster only restarts crashed worker processes, and does not
  attempt to start them when the continuous query scheduler should.
  """
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  expected_workers = result['count']

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  expected_combiners = result['count']

  q = 'SELECT COUNT(*) FROM stream'
  pipeline.create_cv('test_pm_recovery', q)
  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  def backend():
    try:
      # Just keep a long-running backend connection open
      client = pipeline.engine.connect()
      result = client.execute('SELECT pg_sleep(10000)')
    except:
      pass

  t = threading.Thread(target=backend)
  t.start()

  attempts = 0
  result = None
  backend_pid = 0

  while not result and attempts < 10:
    result = pipeline.execute("""SELECT pid, query FROM pg_stat_activity WHERE lower(query) LIKE '%%pg_sleep%%'""").first()
    time.sleep(1)
    attempts += 1

  assert result

  backend_pid = result['pid']
  os.kill(backend_pid, signal.SIGKILL)

  attempts = 0
  pipeline.conn = None

  while attempts < 10:
    try:
      pipeline.conn = pipeline.engine.connect()
      break
    except:
      time.sleep(1)
      pass
    attempts += 1

  assert pipeline.conn

  # Now verify that we have the correct number of CQ worker procs
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  assert result['count'] == expected_workers

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  assert result['count'] == expected_combiners