def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'"
        % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream', ('x', 'y', 'z'), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
示例#2
0
def _test_agg(pipeline, agg, check_fn=None):
    name = agg[:agg.find('(')]
    q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s'
    cv_name = 'test_%s' % name
    table_name = 'test_%s_t' % name
    desc = ('ts', 'g', 'x', 'y', 'z')

    pipeline.create_cv(cv_name, q % (agg, 'stream'))
    pipeline.create_table(table_name, ts='timestamp', x='integer', y='integer', z='integer', g='integer')

    rows = []
    for i, n in enumerate(range(1000)):
        ts = str(datetime.utcnow() + timedelta(seconds=i))
        row = ts, n % 10, random.randint(1, 256), random.randint(1, 256), random.randint(1, 256)
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert(table_name, desc, rows)

    if check_fn:
        return check_fn(pipeline)

    expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g'))
    result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name))

    assert len(expected) == len(result)

    for e, r in zip(expected, result):
        assert e == r

    pipeline.drop_cv(cv_name)
    pipeline.drop_table(table_name)
示例#3
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    name = "assert_%s_decreases" % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args),
    )

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert("stream", ("x", "y", "z"), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute("SELECT * FROM %s" % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    pipeline.create_stream('stream', x='int', y='text', z='int')
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(name,
                       "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream', ('x', 'y', 'z'), rows)

    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    pipeline.drop_cv(name)
def test_output_tree(pipeline, clean_db):
  """
  Create a relatively complex tree of continuous views
  and transforms chained together by their output streams,
  and verify that all output correctly propagates to the leaves.
  """
  pipeline.create_cv('level0_0', 'SELECT x::integer, count(*) FROM root GROUP BY x')

  pipeline.create_cv('level1_0', 'SELECT (new).x, (new).count FROM level0_0_osrel')
  pipeline.create_cv('level1_1', 'SELECT (new).x, (new).count FROM level0_0_osrel')

  pipeline.create_cv('level2_0', 'SELECT (new).x, (new).count FROM level1_0_osrel')
  pipeline.create_cv('level2_1', 'SELECT (new).x, (new).count FROM level1_0_osrel')
  pipeline.create_cv('level2_2', 'SELECT (new).x, (new).count FROM level1_1_osrel')
  pipeline.create_cv('level2_3', 'SELECT (new).x, (new).count FROM level1_1_osrel')

  pipeline.create_cv('level3_0', 'SELECT (new).x, (new).count FROM level2_0_osrel')
  pipeline.create_cv('level3_1', 'SELECT (new).x, (new).count FROM level2_0_osrel')
  pipeline.create_cv('level3_2', 'SELECT (new).x, (new).count FROM level2_1_osrel')
  pipeline.create_cv('level3_3', 'SELECT (new).x, (new).count FROM level2_1_osrel')
  pipeline.create_cv('level3_4', 'SELECT (new).x, (new).count FROM level2_2_osrel')
  pipeline.create_cv('level3_5', 'SELECT (new).x, (new).count FROM level2_2_osrel')
  pipeline.create_cv('level3_6', 'SELECT (new).x, (new).count FROM level2_3_osrel')
  pipeline.create_cv('level3_7', 'SELECT (new).x, (new).count FROM level2_3_osrel')

  pipeline.insert('root', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(5)

  names = [r[0] for r in pipeline.execute('SELECT name FROM pipeline_views() ORDER BY name DESC')]
  assert len(names) == 15

  # Verify all values propagated to each node in the tree
  for name in names:
    rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name)
    for row in rows:
      x, count = row
      assert count == 100

  pipeline.insert('root', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(5)

  # Verify all values propagated to each node in the tree again
  for name in names:
    rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name)
    for row in rows:
      x, count = row
      assert count == 200

  # Drop these in reverse dependency order to prevent deadlocks
  for name in names:
    pipeline.drop_cv(name)
示例#6
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    pipeline.create_stream('stream0', x='int', y='text', z='int')
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'"
        % (func, args))

    # We also create a wide sliding window just to verify that user combines work on SW CVs and have the same output
    # as if they were being run on a non-SW CV
    sw_name = name + '_sw_agg'
    pipeline.create_cv(
        sw_name,
        "SELECT x %% 10 AS g, %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 days' GROUP BY g"
        % (func, args))
    verify_name = name + '_sw_agg_verify'
    pipeline.create_cv(
        verify_name,
        "SELECT x %% 10 AS g, %s(%s) FROM stream0 GROUP BY g" % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream0', ('x', 'y', 'z'), rows)
    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name)[0]
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    # Verify user combines on SW CVs work and produce the expected output
    sw_row = pipeline.execute('SELECT combine(%s) FROM %s' %
                              (func, sw_name))[0]
    expected_row = pipeline.execute('SELECT combine(%s) FROM %s' %
                                    (func, verify_name))[0]
    if isinstance(sw_row['combine'], list):
        sw_row['combine'] = sorted(sw_row['combine'])
        expected_row['combine'] = sorted(expected_row['combine'])
    assert sw_row['combine'] == expected_row['combine']

    pipeline.drop_cv(name)
示例#7
0
def test_concurrent_sw_ticking(pipeline, clean_db):
    """
  Verify that several concurrent sliding-window queries each
  having different windows tick correctly at different intervals.
  """
    pipeline.create_stream('stream0', x='int')
    output_names = []
    for n in range(10):
        name = 'sw%d' % n
        pipeline.create_cv(
            name,
            'SELECT x::integer, count(*) FROM stream0 GROUP BY x',
            sw='%d seconds' % (n + 10))
        output_name = name + '_output'

        q = """
    SELECT arrival_timestamp,
    CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel
    """ % name
        pipeline.create_cv(output_name, q)
        output_names.append(output_name)

    names = [
        r[0] for r in pipeline.execute(
            'SELECT name FROM pipeline_views() ORDER BY name DESC')
    ]
    assert len(names) == 2 * 10

    pipeline.insert('stream0', ('x', ), [(x % 100, ) for x in range(10000)])
    time.sleep(25)

    for name in output_names:
        rows = list(pipeline.execute('SELECT COUNT(DISTINCT x) FROM %s' %
                                     name))
        assert rows[0][0] == 100

        for x in range(100):
            # In window
            assert pipeline.execute(
                'SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d'
                % (name, x))
            # Out of window
            assert pipeline.execute(
                'SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d'
                % (name, x))

    # Drop these in reverse dependency order to prevent deadlocks
    for name in names:
        pipeline.drop_cv(name)
def test_concurrent_sw_ticking(pipeline, clean_db):
  """
  Verify that several concurrent sliding-window queries each
  having different windows tick correctly at different intervals.
  """
  pipeline.create_stream('stream0', x='int')
  output_names = []
  for n in range(10):
    name = 'sw%d' % n
    pipeline.create_cv(name, 'SELECT x::integer, count(*) FROM stream0 GROUP BY x', sw='%d seconds' % (n + 10))
    output_name = name + '_output'

    q = """
    SELECT arrival_timestamp,
    CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel
    """ % name
    pipeline.create_cv(output_name, q)
    output_names.append(output_name)

  names = [r[0] for r in pipeline.execute('SELECT name FROM pipeline_views() ORDER BY name DESC')]
  assert len(names) == 2 * 10

  pipeline.insert('stream0', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(25)

  for name in output_names:
    rows = list(pipeline.execute('SELECT COUNT(DISTINCT x) FROM %s' % name))
    assert rows[0][0] == 100

    for x in range(100):
      # In window
      assert pipeline.execute('SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d' % (name, x))
      # Out of window
      assert pipeline.execute('SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d' % (name, x))

  # Drop these in reverse dependency order to prevent deadlocks
  for name in names:
    pipeline.drop_cv(name)
示例#9
0
def _test_agg(pipeline, agg, check_fn=None):
    name = agg[:agg.find('(')]
    q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s'
    cv_name = 'test_%s' % name
    table_name = 'test_%s_t' % name
    desc = ('ts', 'g', 'x', 'y', 'z')

    pipeline.create_cv(cv_name, q % (agg, 'stream'))
    pipeline.create_table(table_name,
                          ts='timestamp',
                          x='integer',
                          y='integer',
                          z='integer',
                          g='integer')

    rows = []
    for i, n in enumerate(range(1000)):
        ts = str(datetime.utcnow() + timedelta(seconds=i))
        row = ts, n % 10, random.randint(1, 256), random.randint(
            1, 256), random.randint(1, 256)
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert(table_name, desc, rows)

    if check_fn:
        return check_fn(pipeline)

    expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g'))
    result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name))

    assert len(expected) == len(result)

    for e, r in zip(expected, result):
        assert e == r

    pipeline.drop_cv(cv_name)
    pipeline.drop_table(table_name)
示例#10
0
def test_create_drop_continuous_view(pipeline, clean_db):
    """
    Basic sanity check
    """
    pipeline.create_cv('cv0', 'SELECT id::integer FROM stream')
    pipeline.create_cv('cv1', 'SELECT id::integer FROM stream')
    pipeline.create_cv('cv2', 'SELECT id::integer FROM stream')

    result = pipeline.execute('SELECT * FROM pipeline_query')
    names = [r['name'] for r in result]

    assert sorted(names) == ['cv0', 'cv1', 'cv2']

    pipeline.drop_cv('cv0')
    pipeline.drop_cv('cv1')
    pipeline.drop_cv('cv2')

    result = pipeline.execute('SELECT * FROM pipeline_query')
    names = [r['name'] for r in result]

    assert len(names) == 0
示例#11
0
def test_create_drop_continuous_view(pipeline, clean_db):
    """
    Basic sanity check
    """
    pipeline.create_cv('cv0', 'SELECT id::integer FROM stream')
    pipeline.create_cv('cv1', 'SELECT id::integer FROM stream')
    pipeline.create_cv('cv2', 'SELECT id::integer FROM stream')

    result = pipeline.execute('SELECT * FROM pipeline_query')
    names = [r['name'] for r in result]

    assert sorted(names) == ['cv0', 'cv1', 'cv2']

    pipeline.drop_cv('cv0')
    pipeline.drop_cv('cv1')
    pipeline.drop_cv('cv2')

    result = pipeline.execute('SELECT * FROM pipeline_query')
    names = [r['name'] for r in result]

    assert len(names) == 0
示例#12
0
def test_create_views(pipeline, clean_db):
    cvs = []
    pipeline.create_stream('stream0', x='int')
    q = 'SELECT count(*) FROM stream0'

    for i in xrange(1, MAX_CQS):
        cvs.append('cv_%d' % i)
        pipeline.create_cv(cvs[-1], q)

    try:
        pipeline.create_cv('cv_fail', q)
        assert False
    except Exception, e:
        assert 'maximum number of continuous queries exceeded' in e.message

    ids = [
        r['id'] for r in pipeline.execute('SELECT id FROM pipeline_views()')
    ]

    assert len(set(ids)) == len(ids)
    assert set(ids) == set(xrange(1, MAX_CQS))

    num_remove = random.randint(128, 512)

    for _ in xrange(num_remove):
        pipeline.drop_cv(cvs.pop())

    for _ in xrange(num_remove):
        cvs.append('cv_%d' % (len(cvs) + 1))
        pipeline.create_cv(cvs[-1], q)
示例#13
0
def test_output_tree(pipeline, clean_db):
    """
  Create a relatively complex tree of continuous views
  and transforms chained together by their output streams,
  and verify that all output correctly propagates to the leaves.
  """
    pipeline.create_stream('root', x='int')
    pipeline.create_cv('level0_0',
                       'SELECT x::integer, count(*) FROM root GROUP BY x')

    pipeline.create_cv('level1_0',
                       'SELECT (new).x, (new).count FROM level0_0_osrel')
    pipeline.create_cv('level1_1',
                       'SELECT (new).x, (new).count FROM level0_0_osrel')

    pipeline.create_cv('level2_0',
                       'SELECT (new).x, (new).count FROM level1_0_osrel')
    pipeline.create_cv('level2_1',
                       'SELECT (new).x, (new).count FROM level1_0_osrel')
    pipeline.create_cv('level2_2',
                       'SELECT (new).x, (new).count FROM level1_1_osrel')
    pipeline.create_cv('level2_3',
                       'SELECT (new).x, (new).count FROM level1_1_osrel')

    pipeline.create_cv('level3_0',
                       'SELECT (new).x, (new).count FROM level2_0_osrel')
    pipeline.create_cv('level3_1',
                       'SELECT (new).x, (new).count FROM level2_0_osrel')
    pipeline.create_cv('level3_2',
                       'SELECT (new).x, (new).count FROM level2_1_osrel')
    pipeline.create_cv('level3_3',
                       'SELECT (new).x, (new).count FROM level2_1_osrel')
    pipeline.create_cv('level3_4',
                       'SELECT (new).x, (new).count FROM level2_2_osrel')
    pipeline.create_cv('level3_5',
                       'SELECT (new).x, (new).count FROM level2_2_osrel')
    pipeline.create_cv('level3_6',
                       'SELECT (new).x, (new).count FROM level2_3_osrel')
    pipeline.create_cv('level3_7',
                       'SELECT (new).x, (new).count FROM level2_3_osrel')

    pipeline.insert('root', ('x', ), [(x % 100, ) for x in range(10000)])

    names = [
        r[0] for r in pipeline.execute(
            'SELECT name FROM pipeline_views() ORDER BY name DESC')
    ]
    assert len(names) == 15

    # Verify all values propagated to each node in the tree
    for name in names:
        rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' %
                                name)
        for row in rows:
            x, count = row
            assert count == 100

    pipeline.insert('root', ('x', ), [(x % 100, ) for x in range(10000)])

    # Verify all values propagated to each node in the tree again
    for name in names:
        rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' %
                                name)
        for row in rows:
            x, count = row
            assert count == 200

    # Drop these in reverse dependency order to prevent deadlocks
    for name in names:
        pipeline.drop_cv(name)
示例#14
0

def test_create_views(pipeline, clean_db):
    cvs = []
    pipeline.create_stream("stream0", x="int")
    q = "SELECT count(*) FROM stream0"

    for i in xrange(1, MAX_CQS):
        cvs.append("cv_%d" % i)
        pipeline.create_cv(cvs[-1], q)

    try:
        pipeline.create_cv("cv_fail", q)
        assert False
    except Exception, e:
        assert "maximum number of continuous queries exceeded" in e.message

    ids = [r["id"] for r in pipeline.execute("SELECT id FROM pipeline_views()")]

    assert len(set(ids)) == len(ids)
    assert set(ids) == set(xrange(1, MAX_CQS))

    num_remove = random.randint(128, 512)

    for _ in xrange(num_remove):
        pipeline.drop_cv(cvs.pop())

    for _ in xrange(num_remove):
        cvs.append("cv_%d" % (len(cvs) + 1))
        pipeline.create_cv(cvs[-1], q)