示例#1
0
def smoke_loom():
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            try:
                from bayeslite.backends.loom_backend import LoomBackend
            except ImportError:
                pytest.skip('Failed to import Loom.')
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('CREATE TABLE t (a, b, c, d, e)')

            for a, b, c, d, e in itertools.product(*([range(2)] * 4 +
                                                     [['x', 'y']])):
                # XXX Insert synthetic data generator here.
                bdb.sql_execute(
                    '''
                    INSERT INTO t (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)
                ''', (a, b, c, d, e))

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c, d TO NUMERICAL;
                    SET STATTYPES OF e TO NOMINAL
                )
            ''')

            bdb.execute('CREATE GENERATOR m FOR p using loom;')
            bdb.execute('INITIALIZE 1 MODELS FOR m;')

            yield bdb
def test_loom_guess_schema_nominal():
    """Test to make sure that LoomBackend handles the case where the user
    provides a nominal variable with more than 256 distinct values. In this
    case, Loom automatically specifies the unbounded_nominal type.
    """
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (v)')
            vals_to_insert = []
            for i in xrange(300):
                word = ""
                for _j in xrange(20):
                    letter_index = bdb._prng.weakrandom_uniform(
                        len(string.letters))
                    word += string.letters[letter_index]
                vals_to_insert.append(word)
            for i in xrange(len(vals_to_insert)):
                bdb.sql_execute(
                    '''
                    insert into t (v) values (?)
                ''', (vals_to_insert[i], ))

            bdb.execute('create population p for t (v nominal)')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 1 model for g')
            bdb.execute('analyze g for 50 iterations')
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
示例#3
0
def bdb_for_checking_cmi(backend, iterations, seed):
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:', seed=seed) as bdb:
            bdb.sql_execute('CREATE TABLE t (a, b, c)')
            for row in generate_v_structured_data(1000, bdb.np_prng):
                bdb.sql_execute(
                    '''
                    INSERT INTO t (a, b, c) VALUES (?, ?, ?)
                ''', row)

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c TO NOMINAL;
                )
            ''')
            if backend == 'loom':
                try:
                    from bayeslite.backends.loom_backend import LoomBackend
                except ImportError:
                    pytest.skip('Failed to import Loom.')
                bayesdb_register_backend(
                    bdb, LoomBackend(loom_store_path=loom_store_path))
                bdb.execute('CREATE GENERATOR m FOR p using loom')
            elif backend == 'cgpm':
                bdb.execute('CREATE GENERATOR m FOR p using cgpm')
                bdb.backends['cgpm'].set_multiprocess('on')
            else:
                raise ValueError('Backend %s unknown' % (backend, ))
            # XXX we may want to downscale this eventually.
            bdb.execute('INITIALIZE 10 MODELS FOR m;')
            bdb.execute('ANALYZE m FOR %d ITERATIONS;' % (iterations, ))
            if backend == 'cgpm':
                bdb.backends['cgpm'].set_multiprocess('off')
            yield bdb
示例#4
0
def get_backend_object(cfg):
    if cfg.backend is None:
        raise RuntimeError('BACKEND was not set in config file')

    if cfg.backend == 'cgpm':
        return CGPM_Backend({}, multiprocess=False)
    elif cfg.backend == 'loom':
        return LoomBackend(cfg.loom_path)
def test_population_two_generators():
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (x)')
            for x in xrange(10):
                bdb.sql_execute('insert into t (x) values (?)', (x, ))
            bdb.execute('create population p for t (x numerical)')
            bdb.execute('create generator g0 for p using loom')
            bdb.execute('create generator g1 for p using loom')
def test_stattypes():
    """Test of the LoomBackend on a table with all possible data types.
    Only checks for errors from Loom.
    """
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (u, co, b, ca, cy, nu, no)')
            for _x in xrange(10):
                cat_dict = ['a', 'b', 'c']
                bdb.sql_execute(
                    '''
                    insert into t (u, co, b, ca, cy, nu, no)
                    values (?, ?, ?, ?, ?, ?, ?)''',
                    (cat_dict[bdb._prng.weakrandom_uniform(3)],
                     bdb._prng.weakrandom_uniform(200),
                     bdb._prng.weakrandom_uniform(2),
                     cat_dict[bdb._prng.weakrandom_uniform(3)],
                     bdb._prng.weakrandom_uniform(1000) / 4.0,
                     bdb._prng.weakrandom_uniform(1000) / 4.0 - 100.0,
                     bdb._prng.weakrandom_uniform(1000) / 4.0))
            bdb.execute('''create population p for t(
                u unbounded_nominal;
                co counts;
                b boolean;
                ca nominal;
                cy cyclic;
                nu numerical;
                no nominal)
            ''')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 1 model for g')
            bdb.execute('analyze g for 50 iterations')
            bdb.execute('''estimate probability density of
                (co=2, nu=50, u='a') by p''').fetchall()
            bdb.execute('''estimate probability density of
                (nu = 50, u='a') given (co=2) by p''').fetchall()
            with pytest.raises(Exception):
                # There seems to be an issue with encoding boolean variables
                # in LoomBackend.simulate_joint, although using b=1 in the
                # condition for simulate results in no error.
                bdb.execute('''estimate probability density of
                    (b=0) by p''').fetchall()
            bdb.execute('''simulate u, co, b, ca, cy, nu, no
                from p limit 1''').fetchall()
            bdb.execute('''simulate u, b, ca, no
                from p given nu=3, co=2, b=1 limit 1''').fetchall()
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
def test_stattypes():
    """Test of the LoomBackend on a table with all possible data types.
    Only checks for errors from Loom.
    """
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (u, co, b, ca, cy, nu, no)')
            for _x in xrange(10):
                cat_dict = ['a', 'b', 'c']
                bdb.sql_execute(
                    '''
                    insert into t (u, co, b, ca, cy, nu, no)
                    values (?, ?, ?, ?, ?, ?, ?)''',
                    (cat_dict[bdb._prng.weakrandom_uniform(3)],
                     bdb._prng.weakrandom_uniform(200),
                     bdb._prng.weakrandom_uniform(2),
                     cat_dict[bdb._prng.weakrandom_uniform(3)],
                     bdb._prng.weakrandom_uniform(1000) / 4.0,
                     bdb._prng.weakrandom_uniform(1000) / 4.0 - 100.0,
                     bdb._prng.weakrandom_uniform(1000) / 4.0))
            bdb.execute('''create population p for t(
                u unbounded_nominal;
                co counts;
                b boolean;
                ca nominal;
                cy cyclic;
                nu numerical;
                no nominal)
            ''')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 1 model for g')
            bdb.execute('analyze g for 50 iterations')
            bdb.execute('''estimate probability density of
                nu = 50, u='a' from p''').fetchall()
            bdb.execute('''simulate u, co, b, ca, cy, nu, no
                from p limit 1''').fetchall()
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
示例#8
0
 def register(self, line, cell=None):
     parser = argparse.ArgumentParser()
     parser.add_argument('backend', help='Name of backend to register.')
     parser.add_argument(
         'args',
         type=str,
         default=[],
         nargs='*',
         help='List of arguments to provide the initialization.')
     args = parser.parse_args(shlex.split(line))
     if args.backend == 'loom':
         try:
             from bayeslite.backends.loom_backend import LoomBackend
         except ImportError:
             raise ValueError('Failed to import loom backend.')
         if len(args.args) == 0:
             raise ValueError('Specify <path> for loom.')
         loom_store_path = args.args[0]
         bayesdb_register_backend(
             self._bdb, LoomBackend(loom_store_path=loom_store_path))
     else:
         raise ValueError('Unknown backend: %s' % (args.backend, ))
def test_loom_one_numeric():
    """Simple test of the LoomBackend on a one variable table
    Only checks for errors from the Loom system."""
    from datetime import datetime
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t(x)')
            for x in xrange(10):
                bdb.sql_execute('insert into t (x) values (?)', (x, ))
            bdb.execute('create population p for t (x numerical)')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 1 models for g')
            bdb.execute('analyze g for 10 iterations')
            bdb.execute('''
                    estimate probability density of x = 50 from p
            ''').fetchall()
            bdb.execute('simulate x from p limit 1').fetchall()
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
示例#10
0
        'p',
        'p_sn',
        'CREATE POPULATION p FOR t(x NUMERICAL; y NUMERICAL)',
        'CREATE GENERATOR p_sn FOR p USING std_normal()',
        # XXX Invent something that fails for backend-specific reasons here.
        'CREATE GENERATOR p_sn FOR p USING std_normal ...',
        'CREATE GENERATOR p_sn FOR p USING std_normal ...',
        lambda mm: None,
    ),
}

# If Loom exists, add a test case.
try:
    from bayeslite.backends.loom_backend import LoomBackend
    examples['loom'] = (
        lambda: LoomBackend(loom_store_path=tempfile.mkdtemp(
            prefix='bayeslite-loom')),
        't',
        'CREATE TABLE t(x NUMERIC, y CYCLIC, z NOMINAL)',
        'INSERT INTO t (x, y, z) VALUES (?, ?, ?)',
        [
            (0, 1.57, 'foo'),
            (1.83, 3.141, 'bar'),
            (1.82, 3.140, 'bar'),
            (-1, 6.28, 'foo'),
        ],
        'p',
        'p_lm',
        'CREATE POPULATION p FOR t'
        '(x NUMERICAL; y CYCLIC; z NOMINAL)',
        'CREATE GENERATOR p_lm FOR p USING loom()',
        'CREATE GENERATOR p_lm FOR p USING loom ...',
def test_loom_complex_add_analyze_drop_sequence():
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (x)')
            for x in xrange(10):
                bdb.sql_execute('insert into t (x) values (?)', (x, ))
            bdb.execute('create population p for t (x numerical)')
            bdb.execute('create generator g for p using loom')

            bdb.execute('initialize 2 models for g')

            bdb.execute('initialize 3 models if not exists for g')
            population_id = bayesdb_get_population(bdb, 'p')
            generator_id = bayesdb_get_generator(bdb, population_id, 'g')
            cursor = bdb.sql_execute(
                '''
                SELECT num_models FROM bayesdb_loom_generator_model_info
                    WHERE generator_id=?;
            ''', (generator_id, ))
            num_models = cursor.fetchall()[0][0]
            # Make sure that the total number of models is
            # 3 and not 2 + 3 = 5.
            assert num_models == 3

            bdb.execute('analyze g for 10 iterations')
            bdb.execute('estimate probability density of x = 50 from p')

            with pytest.raises(BQLError):
                bdb.execute('drop model 1 from g')
            bdb.execute('drop models from g')

            bdb.execute('initialize 1 models for g')
            population_id = bayesdb_get_population(bdb, 'p')
            generator_id = bayesdb_get_generator(bdb, population_id, 'g')
            cursor = bdb.sql_execute(
                '''
                SELECT num_models FROM bayesdb_loom_generator_model_info
                    WHERE generator_id=?;
            ''', (generator_id, ))
            num_models = cursor.fetchall()[0][0]
            # Make sure that the number of models was reset after dropping.
            assert num_models == 1
            bdb.execute('analyze g for 50 iterations')

            cursor = bdb.execute('''
                estimate probability density of x = 50 from p''')
            probDensityX1 = cursor.fetchall()
            probDensityX1 = [x[0] for x in probDensityX1]
            bdb.execute('simulate x from p limit 1').fetchall()
            bdb.execute('drop models from g')

            bdb.execute('initialize 1 model for g')
            bdb.execute('analyze g for 50 iterations')
            cursor = bdb.execute('''
                estimate probability density of x = 50 from p''')
            probDensityX2 = cursor.fetchall()
            probDensityX2 = [x[0] for x in probDensityX2]
            # Check that the analysis started fresh after dropping models
            # and that it produces similar results the second time.
            for px1, px2 in zip(probDensityX1, probDensityX2):
                assert abs(px1 - px2) < .01
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
def test_loom_four_var():
    """Test Loom on a four variable table.
    Table consists of:
    * x - a random int between 0 and 200
    * y - a random int between 0 and 100
    * xx - just 2*x
    * z - a nominal variable that has an even
    chance of being 'a' or 'b'

    Queries run and tested include:
    estimate similarity, estimate probability density, simulate,
    estimate mutual information, estimate dependence probability,
    infer explicit predict
    """
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t(x, xx, y, z)')
            bdb.sql_execute('''
                insert into t (x, xx, y, z) values (100, 200, 50, 'a')''')
            bdb.sql_execute('''
                insert into t (x, xx, y, z) values (100, 200, 50, 'a')''')
            for _index in xrange(100):
                x = bdb._prng.weakrandom_uniform(X_MAX)
                bdb.sql_execute(
                    '''
                    insert into t(x, xx, y, z) values(?, ?, ?, ?)
                    ''',
                    (x, x * 2, int(bdb._prng.weakrandom_uniform(Y_MAX)),
                     'a' if bdb._prng.weakrandom_uniform(2) == 1 else 'b'))

            bdb.execute('''
                create population p for t(x numerical; xx numerical;
                y numerical; z nominal)''')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 10 model for g')
            bdb.execute('analyze g for 20 iterations')

            with pytest.raises(BQLError):
                relevance = bdb.execute('''
                    estimate
                        predictive relevance
                            to hypothetical rows with values ((x=50, xx=100))
                            in the context of "x"
                    from p
                    where rowid = 1
                ''').fetchall()

            relevance = bdb.execute('''
                estimate
                    predictive relevance
                        to existing rows (rowid = 1)
                        in the context of "x"
                from p
                where rowid = 1
            ''').fetchall()
            assert relevance[0][0] == 1

            similarities = bdb.execute('''estimate similarity
                in the context of x from pairwise p limit 2''').fetchall()
            assert similarities[0][2] <= 1
            assert similarities[1][2] <= 1
            assert abs(similarities[0][2] - similarities[1][2]) < 0.005

            impossible_density = bdb.execute(
                'estimate probability density of x = %d by p' %
                (X_MAX * 2.5, )).fetchall()
            assert impossible_density[0][0] < 0.0001

            possible_density = bdb.execute(
                'estimate probability density of x = %d  by p' %
                ((X_MAX - X_MIN) / 2, )).fetchall()
            assert possible_density[0][0] > 0.001

            nominal_density = bdb.execute('''
                estimate probability density of z = 'a' by p
            ''').fetchall()
            assert abs(nominal_density[0][0] - .5) < 0.2

            mutual_info = bdb.execute('''
                estimate mutual information as mutinf
                from pairwise columns of p order by mutinf
            ''').fetchall()
            _, a, b, c = zip(*mutual_info)
            mutual_info_dict = dict(zip(zip(a, b), c))
            assert mutual_info_dict[('x', 'y')] < mutual_info_dict[(
                'x', 'xx')] < mutual_info_dict[('x', 'x')]

            simulated_data = bdb.execute('simulate x, y from p limit %d' %
                                         (PREDICT_RUNS, )).fetchall()
            xs, ys = zip(*simulated_data)
            assert abs((sum(xs)/len(xs)) - (X_MAX-X_MIN)/2) < \
                    (X_MAX-X_MIN)/5
            assert abs((sum(ys)/len(ys)) - (Y_MAX-Y_MIN)/2) < \
                    (Y_MAX-Y_MIN)/5
            assert sum([1 if (x < Y_MIN or x > X_MAX) else 0
                        for x in xs]) < .5 * PREDICT_RUNS
            assert sum([1 if (y < Y_MIN or y > Y_MAX) else 0
                        for y in ys]) < .5 * PREDICT_RUNS

            dependence = bdb.execute('''estimate dependence probability
                from pairwise variables of p''').fetchall()
            for (_, col1, col2, d_val) in dependence:
                if col1 == col2:
                    assert d_val == 1
                elif col1 in ['xx', 'x'] and col2 in ['xx', 'x']:
                    assert d_val > 0.80
                else:
                    assert d_val < 0.20
            predict_confidence = bdb.execute(
                'infer explicit predict x confidence x_c FROM p').fetchall()
            predictions, confidences = zip(*predict_confidence)
            assert abs((sum(predictions) / len(predictions)) -
                       (X_MAX - X_MIN) / 2) < (X_MAX - X_MIN) / 5
            assert sum(
                [1 if (p < X_MIN or p > X_MAX) else 0
                 for p in predictions]) < .5 * PREDICT_RUNS
            assert all([c == 0 for c in confidences])
def register_loom(bdb):
    loom_store_path = temp_file_path('.bdb')
    loom_backend = LoomBackend(loom_store_path=loom_store_path)
    bayeslite.bayesdb_register_backend(bdb, loom_backend)