Пример #1
0
def bayesdb_simulate(bdb, population_id, constraints, colnos,
        generator_id=None, numpredictions=1, accuracy=None):
    """Simulate rows from a generative model, subject to constraints.

    Returns a list of `numpredictions` tuples, with a value for each
    column specified in the list `colnos`, conditioned on the
    constraints in the list `constraints` of tuples ``(colno,
    value)``.

    The results are simulated from the predictive distribution on
    fresh rows.
    """
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints is not None:
        user_rowid = [
            v for c, v in constraints
            if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints,))
        constraints = [
            (rowid, c, v) for c, v in constraints
            if c not in core.bayesdb_rowid_tokens(bdb)
        ]
    targets = [(rowid, colno) for colno in colnos]
    def loglikelihood(generator_id, metamodel):
        if not constraints:
            return 0
        return metamodel.logpdf_joint(
            bdb, generator_id, constraints, [], None)
    def simulate(generator_id, metamodel, n):
        return metamodel.simulate_joint(
            bdb, generator_id, targets,
            constraints, None, num_predictions=n, accuracy=accuracy)
    generator_ids = [generator_id] if generator_id is not None else \
        core.bayesdb_population_generators(bdb, population_id)
    metamodels = [core.bayesdb_generator_metamodel(bdb, generator_id)
        for generator_id in generator_ids]
    if len(generator_ids) > 1:
        loglikelihoods = map(loglikelihood, generator_ids, metamodels)
        likelihoods = map(math.exp, loglikelihoods)
        total_likelihood = sum(likelihoods)
        if total_likelihood == 0:
            # XXX Show the constraints with symbolic names.
            raise BQLError(bdb, 'Impossible constraints: %r' % (constraints,))
        probabilities = [likelihood/total_likelihood
            for likelihood in likelihoods]
        countses = bdb.np_prng.multinomial(
            numpredictions, probabilities, size=1)
        counts = countses[0]
    else:
        counts = [numpredictions]
    rowses = map(simulate, generator_ids, metamodels, counts)
    all_rows = [row for rows in rowses for row in rows]
    assert all(isinstance(row, (tuple, list)) for row in all_rows)
    return all_rows
Пример #2
0
def _retrieve_rowid_constraints(bdb, population_id, constraints):
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints:
        user_rowid = [
            v for c, v in constraints if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints, ))
        constraints = [(c, v) for c, v in constraints
                       if c not in core.bayesdb_rowid_tokens(bdb)]
    return rowid, constraints
Пример #3
0
def _retrieve_rowid_constraints(bdb, population_id, constraints):
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints:
        user_rowid = [
            v for c, v in constraints
            if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints,))
        constraints = [
            (c, v) for c, v in constraints
            if c not in core.bayesdb_rowid_tokens(bdb)
        ]
    return rowid, constraints
Пример #4
0
def execute_phrase(bdb, phrase, bindings=()):
    """Execute the BQL AST phrase `phrase` and return a cursor of results."""
    if isinstance(phrase, ast.Parametrized):
        n_numpar = phrase.n_numpar
        nampar_map = phrase.nampar_map
        phrase = phrase.phrase
        assert 0 < n_numpar
    else:
        n_numpar = 0
        nampar_map = None
        # Ignore extraneous bindings.  XXX Bad idea?

    if ast.is_query(phrase):
        # Compile the query in the transaction in case we need to
        # execute subqueries to determine column lists.  Compiling is
        # a quick tree descent, so this should be fast.
        out = compiler.Output(n_numpar, nampar_map, bindings)
        with bdb.savepoint():
            compiler.compile_query(bdb, phrase, out)
        winders, unwinders = out.getwindings()
        return execute_wound(bdb, winders, unwinders, out.getvalue(),
                             out.getbindings())

    if isinstance(phrase, ast.Begin):
        txn.bayesdb_begin_transaction(bdb)
        return empty_cursor(bdb)

    if isinstance(phrase, ast.Rollback):
        txn.bayesdb_rollback_transaction(bdb)
        return empty_cursor(bdb)

    if isinstance(phrase, ast.Commit):
        txn.bayesdb_commit_transaction(bdb)
        return empty_cursor(bdb)

    if isinstance(phrase, ast.CreateTabAs):
        assert ast.is_query(phrase.query)
        with bdb.savepoint():
            out = compiler.Output(n_numpar, nampar_map, bindings)
            qt = sqlite3_quote_name(phrase.name)
            temp = 'TEMP ' if phrase.temp else ''
            ifnotexists = 'IF NOT EXISTS ' if phrase.ifnotexists else ''
            out.write('CREATE %sTABLE %s%s AS ' % (temp, ifnotexists, qt))
            compiler.compile_query(bdb, phrase.query, out)
            winders, unwinders = out.getwindings()
            with compiler.bayesdb_wind(bdb, winders, unwinders):
                bdb.sql_execute(out.getvalue(), out.getbindings())
        return empty_cursor(bdb)

    if isinstance(phrase, ast.CreateTabCsv):
        with bdb.savepoint():
            table_exists = core.bayesdb_has_table(bdb, phrase.name)
            if table_exists:
                if phrase.ifnotexists:
                    return empty_cursor(bdb)
                else:
                    raise BQLError(
                        bdb,
                        'Table already exists: %s' % (repr(phrase.name), ))
            bayesdb_read_csv_file(bdb,
                                  phrase.name,
                                  phrase.csv,
                                  header=True,
                                  create=True)
        return empty_cursor(bdb)

    if isinstance(phrase, ast.CreateTabSim):
        assert isinstance(phrase.simulation, ast.Simulate)
        with bdb.savepoint():
            if core.bayesdb_has_table(bdb, phrase.name):
                if phrase.ifnotexists:
                    return empty_cursor(bdb)
                else:
                    raise BQLError(
                        bdb, 'Name already defined as table: %s' %
                        (repr(phrase.name), ))
            if not core.bayesdb_has_population(bdb,
                                               phrase.simulation.population):
                raise BQLError(
                    bdb, 'No such population: %s' %
                    (phrase.simulation.population, ))
            population_id = core.bayesdb_get_population(
                bdb, phrase.simulation.population)
            generator_id = None
            if phrase.simulation.generator is not None:
                if not core.bayesdb_has_generator(bdb, population_id,
                                                  phrase.simulation.generator):
                    raise BQLError(
                        bdb, 'No such generator: %r' %
                        (phrase.simulation.generator, ))
                generator_id = core.bayesdb_get_generator(
                    bdb, population_id, phrase.simulation.generator)
            table = core.bayesdb_population_table(bdb, population_id)
            qn = sqlite3_quote_name(phrase.name)
            qt = sqlite3_quote_name(table)
            column_names = phrase.simulation.columns
            qcns = map(sqlite3_quote_name, column_names)
            cursor = bdb.sql_execute('PRAGMA table_info(%s)' % (qt, ))
            column_sqltypes = {}
            for _colno, name, sqltype, _nonnull, _default, _primary in cursor:
                assert casefold(name) not in column_sqltypes
                column_sqltypes[casefold(name)] = sqltype
            assert 0 < len(column_sqltypes)
            for column_name in column_names:
                if casefold(column_name) not in column_sqltypes:
                    raise BQLError(
                        bdb, 'No such variable'
                        ' in population %r: %s' %
                        (phrase.simulation.population, column_name))
            for column_name, _expression in phrase.simulation.constraints:
                cn = casefold(column_name)
                if (cn not in column_sqltypes
                        and cn not in core.bayesdb_rowid_tokens(bdb)):
                    raise BQLError(
                        bdb, 'No such variable in population %s: %s' %
                        (phrase.simulation.population, column_name))
            # XXX Move to compiler.py.
            # XXX Copypasta of this in compile_simulate!
            out = compiler.Output(n_numpar, nampar_map, bindings)
            out.write('SELECT ')
            with compiler.compiling_paren(bdb, out, 'CAST(', ' AS INTEGER)'):
                compiler.compile_nobql_expression(bdb,
                                                  phrase.simulation.nsamples,
                                                  out)
            for _column_name, expression in phrase.simulation.constraints:
                out.write(', ')
                compiler.compile_nobql_expression(bdb, expression, out)
            winders, unwinders = out.getwindings()
            with compiler.bayesdb_wind(bdb, winders, unwinders):
                cursor = bdb.sql_execute(out.getvalue(),
                                         out.getbindings()).fetchall()
            assert len(cursor) == 1
            nsamples = cursor[0][0]
            assert isinstance(nsamples, int)

            def map_var(var):
                if casefold(var) not in core.bayesdb_rowid_tokens(bdb):
                    return core.bayesdb_variable_number(
                        bdb, population_id, generator_id, var)
                else:
                    return casefold(var)

            def map_constraint(((var, _expression), value)):
                return (map_var(var), value)

            constraints = map(
                map_constraint,
                zip(phrase.simulation.constraints, cursor[0][1:]))
            colnos = map(map_var, column_names)
            schema = ','.join('%s %s' %
                              (qcn, column_sqltypes[casefold(column_name)])
                              for qcn, column_name in zip(qcns, column_names))
            bdb.sql_execute(
                'CREATE %sTABLE %s%s (%s)' %
                ('TEMP ' if phrase.temp else '',
                 'IF NOT EXISTS ' if phrase.ifnotexists else '', qn, schema))
            insert_sql = '''
                INSERT INTO %s (%s) VALUES (%s)
            ''' % (qn, ','.join(qcns), ','.join('?' for qcn in qcns))
            for row in bqlfn.bayesdb_simulate(
                    bdb,
                    population_id,
                    constraints,
                    colnos,
                    generator_id=generator_id,
                    numpredictions=nsamples,
                    accuracy=phrase.simulation.accuracy):
                bdb.sql_execute(insert_sql, row)
        return empty_cursor(bdb)
Пример #5
0
 def map_var(var):
     if casefold(var) not in core.bayesdb_rowid_tokens(bdb):
         return core.bayesdb_variable_number(
             bdb, population_id, generator_id, var)
     else:
         return casefold(var)