示例#1
0
def _test_hs_agg(octopus, agg):
    values = [random.randint(-100, 100) for n in range(1000)]
    h = random.choice(values) + random.randint(-10, 10)

    octopus.create_stream('stream0', x='int')
    cq = 'SELECT %s(%d) WITHIN GROUP (ORDER BY x::integer) FROM stream0' % (agg, h)
    octopus.create_cv('test_%s' % agg, cq)

    octopus.insert('stream0', ('x',), [(v,) for v in values])

    result = octopus.execute('SELECT %s FROM test_%s' % (agg, agg)).first()

    rank, peers = _rank(h, values)
    dense_rank, _ = _dense_rank(h, values)

    return rank, dense_rank, peers, len(values), result[agg]
def test_join_with_aggs(octopus, clean_db):
    """
    Verify that joins involving aggregates referencing columns from
    multiple tables work
    """
    num_cols = 4
    join_cols = [1]
    q = """
    SELECT
    sum(s.col0::integer) AS s0,
    sum(a0.col0::integer) AS s1,
    sum(a1.col0::integer) AS s2
    FROM a1 JOIN a0 ON a1.col1 = a0.col1
    JOIN stream0 s ON s.col1::integer = a0.col1
    """
    a0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    a1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    octopus.create_table('a0', **a0_cols)
    octopus.create_table('a1', **a1_cols)

    a0 = _generate_rows(num_cols, 64)
    a1 = _generate_rows(num_cols, 64)
    s = _generate_rows(num_cols, 64)

    _insert(octopus, 'a0', a0, 0.1)
    _insert(octopus, 'a1', a1, 0.1)

    octopus.create_stream('stream0', **a0_cols)
    octopus.create_cv('test_agg_join', q)
    _insert(octopus, 'stream0', s)

    expected = _join(a1, _join(a0, s, join_cols), join_cols)
    result = octopus.execute('SELECT * FROM test_agg_join').first()

    # sum of col0 from stream
    s0_expected = sum([r[num_cols * 2] for r in expected])

    # sum of col0 from a0
    s1_expected = sum([r[num_cols * 1] for r in expected])

    # sum of col0 from a1
    s2_expected = sum([r[num_cols * 0] for r in expected])

    assert s0_expected == result['s0']
    assert s1_expected == result['s1']
    assert s2_expected == result['s2']
示例#3
0
def test_percentile_cont_agg(octopus, clean_db):
    range_top = 100000
    q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0]

    batches = []
    min_seen = range_top
    max_seen = 0
    for _ in xrange(10):
        b = [(random.randint(0, range_top), ) for _ in xrange(5000)]
        min_seen = min(min_seen, min(b)[0])
        max_seen = max(max_seen, max(b)[0])
        batches.append(b)

    octopus.create_stream('test_stream', x='int')
    query = '''SELECT
  percentile_cont(ARRAY[%s])
  WITHIN GROUP (ORDER BY x::integer) FROM %s
  ''' % (', '.join(map(lambda f: str(f), q)), '%s')

    octopus.create_cv('test_cq_percentile_cont', query % 'test_stream')
    octopus.create_table('test_percentile_cont', x='integer')

    for b in batches:
        octopus.insert('test_stream', ('x', ), b)
        octopus.insert('test_percentile_cont', ('x', ), b)

    actual = octopus.execute(query % 'test_percentile_cont')
    result = octopus.execute('SELECT * FROM test_cq_percentile_cont')

    actual = actual.first()['percentile_cont']
    result = result.first()['percentile_cont']

    assert len(actual) == len(result)
    assert result == sorted(result)
    diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))]

    # 0th and 100th percentile should be accurate.
    assert result[0] == min_seen
    assert result[-1] == max_seen

    # 1st and 99th percentile should be within 0.1%.
    assert diff[1] <= 0.001 * range_top
    assert diff[-2] <= 0.001 * range_top

    # All percentiles should be within 0.5%.
    assert all(x <= 0.005 * range_top for x in diff)
示例#4
0
def test_concurrent_crash(octopus, clean_db):
    """
  Test simple worker and combiner crashes.
  """
    octopus.create_stream('stream0', x='int')
    q = 'SELECT COUNT(*) FROM stream0'
    octopus.create_cv('test_concurrent_crash', q)
    batch_size = 25000

    desc = [0, 0, False]
    vals = [(1, )] * batch_size

    def insert():
        while True:
            octopus.insert('stream0', ['x'], vals)
            desc[1] += batch_size
            if desc[2]:
                break

    def kill():
        for _ in xrange(30):
            r = random.random()
            if r > 0.85:
                desc[0] += kill_combiner()
            if r < 0.15:
                desc[0] += kill_worker()
            time.sleep(0.1)

        desc[2] = True

    threads = [threading.Thread(target=insert), threading.Thread(target=kill)]
    map(lambda t: t.start(), threads)
    map(lambda t: t.join(), threads)

    num_killed = desc[0]
    num_inserted = desc[1]

    result = octopus.execute('SELECT count FROM test_concurrent_crash').first()

    assert num_killed > 0
    assert result['count'] <= num_inserted
    assert result['count'] >= num_inserted - (num_killed * batch_size)

    # To ensure that all remaining events in ZMQ queues have been consumed
    time.sleep(2)
示例#5
0
def test_cksketch_frequency(octopus, clean_db):
  octopus.create_stream('test_cmsketch_stream', k='int', x='int')

  q = """
  SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
  GROUP BY k
  """
  desc = ('k', 'x')
  octopus.create_cv('test_cmsketch_frequency', q)

  rows = [(n, None) for n in range(100)]
  octopus.insert('test_cmsketch_stream', desc, rows)

  result = list(octopus.execute(
    'SELECT cmsketch_frequency(c, null) AS x FROM test_cmsketch_frequency ORDER BY k').fetchall())
  assert len(result) == 100
  for row in result:
    assert row[0] == 0
示例#6
0
def test_static_streams(octopus, clean_db):
  """
  Verify that static stream definitions are dumped and restored
  """
  octopus.create_stream('static', x='int', y='float8')

  _dump(octopus, 'test_static.sql')

  octopus.drop_stream('static')
  _restore(octopus, 'test_static.sql')

  # Force the requirement of a static stream
  octopus.create_cv('static_cv', 'SELECT x, y FROM static')
  octopus.insert('static', ('x', 'y'), [(0, 1)])

  result = octopus.execute('SELECT x, y FROM static_cv').first()
  assert result['x'] == 0
  assert result['y'] == 1
示例#7
0
def test_simple_insert(octopus, clean_db):
    """
  Verify that we can insert some rows and count some stuff
  """
    octopus.create_stream('stream0', key='int')
    octopus.create_cv(
        'cv', 'SELECT key::integer, COUNT(*) FROM stream0 GROUP BY key')

    rows = [(n % 10, ) for n in range(1000)]

    octopus.insert('stream0', ('key', ), rows)

    result = list(octopus.execute('SELECT * FROM cv ORDER BY key'))

    assert len(result) == 10
    for i, row in enumerate(result):
        assert row['key'] == i
        assert row['count'] == 100
示例#8
0
def test_regression(octopus, clean_db):
    path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv'))
    _generate_csv(
        path, [['2015-06-01 00:00:00', 'De', 'Adam_Babareka', '1', '37433']],
        desc=('day', 'project', 'title', 'count', 'size'))

    octopus.create_stream('copy_regression_stream',
                          count='int',
                          day='timestamp',
                          project='text',
                          title='text',
                          size='int')
    octopus.create_cv('test_copy_regression',
                      'SELECT sum(count) FROM copy_regression_stream')

    octopus.execute(
        "COPY copy_regression_stream (day, project, title, count, size) FROM '%s' CSV HEADER"
        % path)
示例#9
0
def test_concurrent_copy(octopus, clean_db):
    octopus.create_stream('stream0', x='int')
    octopus.create_cv('concurrent_copy0',
                      'SELECT x::int, count(*) FROM stream0 GROUP BY x')
    octopus.create_cv('concurrent_copy1', 'SELECT count(*) FROM stream0')

    tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.copy')
    query = 'SELECT generate_series(1, 2000) AS x'
    octopus.execute("COPY (%s) TO '%s'" % (query, tmp_file))

    num_threads = 4
    stop = False
    inserted = [0] * num_threads

    def insert(i):
        conn = psycopg2.connect(
            'dbname=octopus user=%s host=localhost port=%s' %
            (getpass.getuser(), octopus.port))
        cur = conn.cursor()
        while not stop:
            cur.execute("COPY stream0 (x) FROM '%s'" % tmp_file)
            conn.commit()
            inserted[i] += 2000
        conn.close()

    threads = [
        threading.Thread(target=insert, args=(i, )) for i in range(num_threads)
    ]
    map(lambda t: t.start(), threads)

    time.sleep(60)

    stop = True
    map(lambda t: t.join(), threads)

    time.sleep(5)

    total = (octopus.execute(
        'SELECT sum(count) FROM concurrent_copy0').first()['sum'])
    assert total == sum(inserted)

    total = (
        octopus.execute('SELECT count FROM concurrent_copy1').first()['count'])
    assert total == sum(inserted)
示例#10
0
def test_stream_buffer(octopus, clean_db):
    """
  Run CQs which take different times to process an event in the stream buffer
  and each stream is read by a disjoint set of CQs.
  """
    octopus.create_stream('stream1', x='int', string='text')
    octopus.create_stream('stream2', x='int', string='text')

    octopus.create_cv('test_sbuf_1', 'SELECT x::int FROM stream1')
    octopus.create_cv('test_sbuf_2',
                      'SELECT x::int, pg_sleep(0.002) FROM stream1')
    octopus.create_cv('test_sbuf_3', 'SELECT x::int FROM stream2')
    octopus.create_cv('test_sbuf_4',
                      'SELECT x::int, pg_sleep(0.002) FROM stream2')

    num_per_batch = 5000
    num_batches = 6
    # We're sending this 2k string with each event just to consume
    # more space in the stream buffer. That way we get to test the
    # wrapping of the buffer as well.
    values = [(i, 'a' * 2048) for i in xrange(num_per_batch)]

    def insert(stream):
        for _ in xrange(num_batches - 1):
            octopus.insert(stream, ('x', 'string'), values)

    threads = [
        threading.Thread(target=insert, args=('stream1', )),
        threading.Thread(target=insert, args=('stream2', ))
    ]

    map(lambda t: t.start(), threads)
    map(lambda t: t.join(), threads)

    octopus.insert('stream1', ('x', 'string'), values)
    octopus.insert('stream2', ('x', 'string'), values)

    q = 'SELECT COUNT(*) FROM test_sbuf_%d'
    r1 = octopus.execute(q % 1).first()[0]
    r2 = octopus.execute(q % 2).first()[0]
    r3 = octopus.execute(q % 3).first()[0]
    r4 = octopus.execute(q % 4).first()[0]

    assert r1 == r2 == r3 == r4 == num_batches * num_per_batch
示例#11
0
def test_transforms(octopus, clean_db):
  """
  Verify that continuous transforms work properly on output streams
  """
  octopus.create_stream('stream0', x='int')
  octopus.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x',
                     sw='5 seconds')

  # Write a row to a stream each time a row goes out of window
  q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL'
  octopus.create_stream('oow_stream', x='integer')
  octopus.create_ct('ct', q, "octopus_stream_insert('oow_stream')")
  octopus.create_cv('ct_recv', 'SELECT x FROM oow_stream')

  octopus.insert('stream0', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(7)

  rows = list(octopus.execute('SELECT * FROM ct_recv'))
  assert len(rows) == 100
示例#12
0
def test_schema_only(octopus, clean_db):
  """
  Verify that it is possible to only dump continuous view schemas and not data
  """
  octopus.create_stream('stream0', x='int')
  octopus.create_cv('test_schema', 'SELECT COUNT(*) FROM stream0')
  octopus.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = octopus.execute('SELECT count FROM test_schema').first()
  assert result['count'] == 10

  _dump(octopus, 'test_schema.sql', schema_only=True)

  octopus.drop_all()
  _restore(octopus, 'test_schema.sql')

  # No data loaded
  result = list(octopus.execute('SELECT count FROM test_schema'))
  assert not result
示例#13
0
def test_distinct(octopus, clean_db):
    """
  Verify that streaming SELECT DISTINCT ON (...) works
  """
    octopus.create_stream('stream0', x='int', y='int', z='int')
    octopus.create_table('table0', x='int', y='int', z='int')
    q = 'SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM stream0'
    octopus.create_cv('test_distinct', q)

    uniques = defaultdict(set)
    values = []
    for _ in xrange(2000):
        x, y, z = random.randint(0, 20), random.randint(0, 20), random.randint(
            0, 20)
        values.append((x, y, z))
        uniques[(x, y - z)].add(y)

    octopus.insert('stream0', ['x', 'y', 'z'], values)
    octopus.insert('table0', ['x', 'y', 'z'], values)

    q = """
  SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM table0
  """
    expected = list(octopus.execute(q))
    expected = len(expected)

    assert expected < 2000

    result = octopus.execute('SELECT COUNT(*) FROM test_distinct').first()

    assert expected == result['count']

    # Check if the first row was selected for uniques
    result = octopus.execute('SELECT * FROM test_distinct')
    reverse_uniques = defaultdict(set)

    for (x, _), ys in uniques.iteritems():
        for y in ys:
            reverse_uniques[y].add(x)

    for row in result:
        assert row['x'] in reverse_uniques[row['y']]
示例#14
0
def test_deadlock_regress(octopus, clean_db):
  nitems = 2000000
  tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json')
  query = 'SELECT generate_series(1, %d) AS n' % nitems
  octopus.execute("COPY (%s) TO '%s'" % (query, tmp_file))

  octopus.create_stream('s1', n='int')
  octopus.create_stream('s2', n='int')
  octopus.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL',
                     "octopus_stream_insert('s2')")
  octopus.create_cv('cv', 'SELECT count(*) FROM s2')

  for copy in [True, False]:
    for nworkers in [1, 4]:
      for sync in ['receive', 'commit']:
        octopus.stop()
        octopus.run({
          'continuous_query_num_workers': nworkers,
          'stream_insert_level': 'sync_%s' % sync
          })

        octopus.execute("SELECT truncate_continuous_view('cv')")
        octopus.execute('COMMIT')

        if copy:
          octopus.execute("COPY s1 (n) FROM '%s'" % tmp_file)
        else:
          octopus.execute('INSERT INTO s1 (n) %s' % query)

        count = dict(octopus.execute('SELECT count FROM cv').first() or {})
        ntries = 5
        while count.get('count') != nitems and ntries > 0:
          assert sync == 'receive'
          time.sleep(1)
          count = dict(octopus.execute('SELECT count FROM cv').first() or {})
          ntries -= 1
        assert count and count['count'] == nitems

  os.remove(tmp_file)

  octopus.stop()
  octopus.run()
示例#15
0
def test_hll_count_distinct(octopus, clean_db):
    """
    Verify that streaming COUNT(DISTINCT) works
    """
    octopus.create_stream('stream0', x='int')
    q = 'SELECT COUNT(DISTINCT x::integer) FROM stream0'
    octopus.create_cv('test_count_distinct', q)

    desc = ('x', )
    values = [(random.randint(1, 1024), ) for n in range(1000)]

    octopus.insert('stream0', desc, values)

    expected = len(set(values))
    result = octopus.execute('SELECT count FROM test_count_distinct').first()

    # Error rate should be well below %2
    delta = abs(expected - result['count'])

    assert delta / float(expected) <= 0.02
示例#16
0
def test_object_aggs(octopus, clean_db):
    """
    Verify that combines work properly on object aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \')FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y', 's')
    octopus.create_stream('stream0', x='int', y='float8', s='text')
    octopus.create_cv('test_object_aggs', q)
    octopus.create_table('test_object_aggs_t',
                         x='integer',
                         y='float8',
                         s='text')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random(),
               str(n) * random.randint(1, 8))
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_object_aggs_t', desc, rows)

    tq = """
    SELECT json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \') FROM test_object_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(json_agg), combine(json_object_agg), combine(string_agg) FROM test_object_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert sorted(tr[0]) == sorted(cr[0])
        assert sorted(tr[1]) == sorted(cr[1])
        assert sorted(tr[2]) == sorted(cr[2])
示例#17
0
def test_concurrent_inserts(octopus, clean_db):
    octopus.create_stream('stream0', x='int')
    octopus.create_cv('concurrent_inserts0',
                      'SELECT x::int, count(*) FROM stream0 GROUP BY x')
    octopus.create_cv('concurrent_inserts1', 'SELECT count(*) FROM stream0')

    num_threads = 4
    stop = False
    inserted = [0] * num_threads

    def insert(i):
        conn = psycopg2.connect(
            'dbname=octopus user=%s host=localhost port=%s' %
            (getpass.getuser(), octopus.port))
        cur = conn.cursor()
        while not stop:
            cur.execute('INSERT INTO stream0 (x) '
                        'SELECT x % 100 FROM generate_series(1, 2000) AS x')
            conn.commit()
            inserted[i] += 2000
        conn.close()

    threads = [
        threading.Thread(target=insert, args=(i, )) for i in range(num_threads)
    ]
    map(lambda t: t.start(), threads)

    time.sleep(60)

    stop = True
    map(lambda t: t.join(), threads)

    time.sleep(5)

    total = (octopus.execute(
        'SELECT sum(count) FROM concurrent_inserts0').first()['sum'])
    assert total == sum(inserted)

    total = (octopus.execute('SELECT count FROM concurrent_inserts1').first()
             ['count'])
    assert total == sum(inserted)
示例#18
0
def test_multiple(octopus, clean_db):
    """
  Verify that multiple continuous views work together properly
  """
    octopus.create_stream('stream0', n='numeric', s='text', unused='int')
    octopus.create_cv('cv0',
                      'SELECT n::numeric FROM stream0 WHERE n > 10.00001')
    octopus.create_cv('cv1',
                      'SELECT s::text FROM stream0 WHERE s LIKE \'%%this%%\'')

    rows = [(float(n + 10), 'this', 100) for n in range(1000)]
    for n in range(10):
        rows.append((float(n), 'not a match', -n))

    octopus.insert('stream0', ('n', 's', 'unused'), rows)

    result = list(octopus.execute('SELECT * FROM cv0'))
    assert len(result) == 999

    result = list(octopus.execute('SELECT * FROM cv1'))
    assert len(result) == 1000
示例#19
0
def test_dump(octopus, clean_db):
  """
  Verify that we can dump and restore CVs using INSERT statements
  """
  octopus.create_stream('stream0', x='int')
  q = """
  SELECT x::integer %% 100 AS g, avg(x) + 1 AS avg, count(*), count(distinct x) AS distincts FROM stream0
  GROUP BY g
  """
  octopus.create_cv('test_dump', q)

  rows = [(x,) for x in range(1000)]
  octopus.insert('stream0', ('x',), rows)

  def _verify():
    result = octopus.execute('SELECT count(*) FROM test_dump').first()
    assert result['count'] == 100

    result = octopus.execute('SELECT sum(avg) FROM test_dump').first()
    assert result['sum'] == 50050

    result = octopus.execute('SELECT sum(distincts) FROM test_dump').first()
    assert result['sum'] == 1000

  _verify()
  _dump(octopus, 'test_dump.sql')

  octopus.drop_all()
  _restore(octopus, 'test_dump.sql')
  _verify()

  # Now verify that we can successfully add more data to the restored CV
  rows = [(x,) for x in range(2000)]
  octopus.insert('stream0', ('x',), rows)

  result = octopus.execute('SELECT sum(count) FROM test_dump').first()
  assert result['sum'] == 3000

  result = octopus.execute('SELECT sum(distincts) FROM test_dump').first()
  assert result['sum'] == 2000
示例#20
0
def test_hypothetical_set_aggs(octopus, clean_db):
    """
    Verify that combines work properly on HS aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    rank(256) WITHIN GROUP (ORDER BY x),
    dense_rank(256) WITHIN GROUP (ORDER BY x)
    FROM stream0 GROUP BY k
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_hs_aggs', q)
    octopus.create_table('test_hs_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_hs_aggs_t', desc, rows)

    # Note that the CQ will use the HLL variant of dense_rank,
    # so use hll_dense_rank on the table too
    tq = """
    SELECT rank(256) WITHIN GROUP (ORDER BY x), hll_dense_rank(256) WITHIN GROUP (ORDER BY x)
    FROM test_hs_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(rank), combine(dense_rank) FROM test_hs_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
示例#21
0
def test_create_drop_continuous_view(octopus, clean_db):
    """
  Basic sanity check
  """
    octopus.create_stream('stream0', id='int')
    octopus.create_cv('cv0', 'SELECT id::integer FROM stream0')
    octopus.create_cv('cv1', 'SELECT id::integer FROM stream0')
    octopus.create_cv('cv2', 'SELECT id::integer FROM stream0')

    result = octopus.execute('SELECT * FROM octopus_views()')
    names = [r['name'] for r in result]

    assert sorted(names) == ['cv0', 'cv1', 'cv2']

    octopus.drop_cv('cv0')
    octopus.drop_cv('cv1')
    octopus.drop_cv('cv2')

    result = octopus.execute('SELECT * FROM octopus_views()')
    names = [r['name'] for r in result]

    assert len(names) == 0
示例#22
0
def test_combine(octopus, clean_db):
    """
  Verify that partial tuples are combined with on-disk tuples
  """
    octopus.create_stream('stream0', key='text', unused='int')
    octopus.create_cv('combine',
                      'SELECT key::text, COUNT(*) FROM stream0 GROUP BY key')

    rows = []
    for n in range(100):
        for m in range(100):
            key = '%d%d' % (n % 10, m)
            rows.append((key, 0))

    octopus.insert('stream0', ('key', 'unused'), rows)

    total = 0
    result = octopus.execute('SELECT * FROM combine')
    for row in result:
        total += row['count']

    assert total == 10000
示例#23
0
def test_colums_subset(octopus, clean_db):
    """
    Verify that copying data from a file into a stream works when the file's input
    columns are a subset of the stream0's columns
    """
    octopus.create_stream('stream0', x='int', y='float8', z='numeric', m='int')
    q = 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric), max(m::integer) FROM stream0'
    octopus.create_cv('test_copy_subset', q)
    octopus.create_table('test_copy_subset_t',
                         x='integer',
                         y='float8',
                         z='numeric')

    path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv'))

    rows = []
    for n in range(10000):
        row = random.randint(1, 1024), random.randint(1, 1024), random.random()
        rows.append(row)

    _generate_csv(path, rows, desc=('x', 'y', 'z'))

    octopus.execute(
        'COPY test_copy_subset_t (x, y, z) FROM \'%s\' HEADER CSV' % path)

    octopus.execute('COPY stream0 (x, y, z) FROM \'%s\' HEADER CSV' % path)

    expected = octopus.execute(
        'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric) FROM test_copy_subset_t'
    ).first()
    result = list(octopus.execute('SELECT s0, s1, avg FROM test_copy_subset'))

    assert len(result) == 1

    result = result[0]

    assert result[0] == expected[0]
    assert result[1] == expected[1]
    assert result[2] == expected[2]
示例#24
0
def test_concurrent_sw_ticking(octopus, clean_db):
  """
  Verify that several concurrent sliding-window queries each
  having different windows tick correctly at different intervals.
  """
  octopus.create_stream('stream0', x='int')
  output_names = []
  for n in range(10):
    name = 'sw%d' % n
    octopus.create_cv(name, 'SELECT x::integer, count(*) FROM stream0 GROUP BY x', sw='%d seconds' % (n + 10))
    output_name = name + '_output'

    q = """
    SELECT arrival_timestamp,
    CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel
    """ % name
    octopus.create_cv(output_name, q)
    output_names.append(output_name)

  names = [r[0] for r in octopus.execute('SELECT name FROM octopus_views() ORDER BY name DESC')]
  assert len(names) == 2 * 10

  octopus.insert('stream0', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(25)

  for name in output_names:
    rows = list(octopus.execute('SELECT COUNT(DISTINCT x) FROM %s' % name))
    assert rows[0][0] == 100

    for x in range(100):
      # In window
      assert octopus.execute('SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d' % (name, x))
      # Out of window
      assert octopus.execute('SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d' % (name, x))

  # Drop these in reverse dependency order to prevent deadlocks
  for name in names:
    octopus.drop_cv(name)
示例#25
0
def test_user_low_and_high_card(octopus, clean_db):
    """
  Verify that Bloom filters's with low and high cardinalities are correcly
  unioned
  """
    octopus.create_stream('test_bloom_stream', x='int', k='int')

    q = """
  SELECT k::integer, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
  """
    desc = ('k', 'x')
    octopus.create_cv('test_bloom_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    octopus.insert('test_bloom_stream', desc, rows)

    result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                             'FROM test_bloom_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                             'FROM test_bloom_agg WHERE k in (2, 3)').first()
    assert result[0] == 8879

    result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                             'FROM test_bloom_agg').first()
    assert result[0] == 8881
示例#26
0
def test_windowed_combine(octopus, clean_db):
    """
    Verify that windowed queries with combines work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_windowed_combine', q)
    octopus.create_table('test_windowed_combine_t', x='integer', y='integer')

    rows = []
    for n in range(10000):
        row = (n, n)
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_windowed_combine_t', desc, rows)

    table = """
    SELECT first_value(x) OVER w, avg(y) OVER w
    FROM test_windowed_combine_t
    WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING)
    ORDER BY first_value
    """
    expected = list(octopus.execute(table))
    combine = """
    SELECT first_value(x) OVER w, avg(avg) OVER w
    FROM test_windowed_combine
    WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING)
    ORDER BY first_value
    """
    actual = list(octopus.execute(combine))

    for e, a in zip(expected, actual):
        assert e == a
示例#27
0
def test_prepared_inserts(octopus, clean_db):
  """
  Verify that we can PREPARE and EXECUTE an INSERT statement
  """
  octopus.create_stream('stream0', x='int', y='float8')

  conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port))
  db = conn.cursor()
  db.execute('CREATE CONTINUOUS VIEW test_prepared0 AS SELECT x::integer, COUNT(*), sum(y::integer) FROM stream0 GROUP BY x')
  db.execute('CREATE CONTINUOUS VIEW test_prepared1 AS SELECT x::integer, COUNT(*), sum(y::float8) FROM stream0 GROUP BY x')
  conn.commit()

  db.execute('PREPARE ins AS INSERT INTO stream0 (x, y) VALUES ($1, $2)')

  for n in range(10000):
    row = (n % 100, random.random())
    db.execute('EXECUTE ins (%s, %s)' % row)

  time.sleep(0.1)

  conn.commit()

  result = list(octopus.execute('SELECT * FROM test_prepared0 ORDER BY x'))

  assert len(result) == 100

  for n in range(100):
    assert result[n]['count'] == 100

  result = list(octopus.execute('SELECT * FROM test_prepared1 ORDER BY x'))

  assert len(result) == 100

  for n in range(100):
    assert result[n]['count'] == 100

  conn.close()
示例#28
0
def test_stats_aggs(octopus, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_stats_aggs', q)
    octopus.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
示例#29
0
def test_hll_distinct(octopus, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream0 GROUP BY k
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_hll_distinct', q)
    octopus.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
示例#30
0
def test_null_groups(octopus, clean_db):
    """
    Verify that null group columns are considered equal
    """
    octopus.create_stream('s', x='int', y='int', z='int')
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM s
    GROUP BY x, y, z;
    """
    desc = ('x', 'y', 'z')
    octopus.create_cv('test_null_groups', q)
    octopus.create_table('test_null_groups_t',
                         x='integer',
                         y='integer',
                         z='integer')

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() > 0.1 and n or None, vals)
        rows.append(tuple(vals))

    octopus.insert('s', desc, rows)
    octopus.insert('test_null_groups_t', desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(octopus.execute(table_q))
    result = list(
        octopus.execute(
            'SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z'))

    for r, e in zip(result, expected):
        assert r == e