def test_simulate_models_nontrivial(): # XXX test descent into ExpLit # XXX test descent into ExpNumpar # XXX test descent into ExpNampar # XXX test descent into ExpCol # XXX test descent into ExpSub # XXX test descent into ExpCollate # XXX test descent into ExpIn # XXX test descent into ExpCast # XXX test descent into ExpExists # XXX test descent into ExpApp # XXX test descent into ExpAppStar # XXX test descent into ExpCase mutinf0 = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], [('c3', ast.ExpLit(ast.LitInt(3)))], None) mutinf1 = ast.ExpBQLMutInf(['c4', 'c5'], ['c6'], [('c7', ast.ExpLit(ast.LitString('ergodic')))], 100) probdensity = ast.ExpBQLProbDensity( [('x', ast.ExpLit(ast.LitFloat(1.2)))], # No conditions for now -- that changes the weighting of the average. []) expression0 = ast.ExpOp(ast.OP_LT, [ mutinf0, ast.ExpOp(ast.OP_MUL, [ast.ExpLit(ast.LitFloat(0.1)), mutinf1]), ]) expression1 = probdensity simmodels = ast.SimulateModelsExp([ ast.SelColExp(expression0, 'quagga'), ast.SelColExp(expression1, 'eland'), ], 'p', 'g') assert macro.expand_simulate_models(simmodels) == \ ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp( ast.ExpOp(ast.OP_LT, [ ast.ExpCol(None, 'v0'), ast.ExpOp(ast.OP_MUL, [ ast.ExpLit(ast.LitFloat(0.1)), ast.ExpCol(None, 'v1'), ]) ]), 'quagga'), ast.SelColExp(ast.ExpCol(None, 'v2'), 'eland'), ], [ast.SelTab( ast.SimulateModels( [ ast.SelColExp(mutinf0, 'v0'), ast.SelColExp(mutinf1, 'v1'), ast.SelColExp(probdensity, 'v2'), ], 'p', 'g'), None)], None, None, None, None)
def expand_probability_estimate(probest, population, generator): simmodels = ast.SimulateModelsExp([ast.SelColExp(probest.expression, 'x')], population, generator) select = ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]), None) ], [ast.SelTab(simmodels, None)], None, None, None, None) return ast.ExpSub(select)
def test_using_model(): assert parse_bql_string('simulate x from t using model 42' ' limit 10') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(42)), [], ast.ExpLit(ast.LitInt(10)))] with pytest.raises(parse.BQLParseError): assert parse_bql_string('simulate x from t' ' using model (87)') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(87)), [], ast.ExpLit(ast.LitInt(10)))] assert parse_bql_string('estimate x from t using model (1+2)') == \ [ast.Estimate(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpOp(ast.OP_ADD, ( ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)), )), None, None, None, None)] assert parse_bql_string('estimate * from columns of t' ' using model modelno') == \ [ast.EstCols([ast.SelColAll(None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from columns of t' ' using model modelno') == \ [ast.EstCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from pairwise columns of t' ' using model modelno') == \ [ast.EstPairCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', None, ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate similarity from pairwise t' ' using model modelno') == \ [ast.EstPairRow([ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('infer x from t using model modelno') == \ [ast.InferAuto([ast.InfColOne('x', None)], ast.ExpLit(ast.LitInt(0)), 't', ast.ExpCol(None, 'modelno'), None, None, None, None)] assert parse_bql_string('infer explicit x from t using model modelno') == \ [ast.InferExplicit([ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None, None)]
def test_is_bql(): assert ast.is_bql(ast.ExpLit(ast.LitInt(0))) == False assert ast.is_bql(ast.ExpNumpar(0)) == False assert ast.is_bql(ast.ExpNampar(0, 'x')) == False assert ast.is_bql(ast.ExpCol('t', 'c')) == False # ... assert ast.is_bql(ast.ExpBQLPredProb('c')) assert ast.is_bql(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(0)))], [])) assert ast.is_bql(ast.ExpBQLProbFn(ast.ExpLit(ast.LitInt(0)), [])) assert ast.is_bql(ast.ExpBQLSim(ast.ExpLit(ast.LitInt(0)), [])) assert ast.is_bql(ast.ExpBQLDepProb('c0', 'c1')) assert ast.is_bql(ast.ExpBQLMutInf('c0', 'c1', None, 100)) assert ast.is_bql(ast.ExpBQLCorrel('c0', 'c1')) assert ast.is_bql(ast.ExpBQLPredict('c', ast.ExpLit(ast.LitInt(.5)), None)) assert ast.is_bql(ast.ExpBQLPredictConf('c', None))
def test_expand_probability_estimate(): expression = ast.ExpOp(ast.OP_LT, [ ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], [('c3', ast.ExpLit(ast.LitInt(3)))], None), ast.ExpLit(ast.LitFloat(0.1)), ]) probest = ast.ExpBQLProbEst(expression) assert macro.expand_probability_estimate(probest, 'p', 'g') == \ ast.ExpSub( ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]), None)], [ast.SelTab( ast.SimulateModelsExp([ast.SelColExp(expression, 'x')], 'p', 'g'), None)], None, None, None, None))
def _expand_simmodel_exp(exp, simcols): if isinstance(exp, ast.ExpCol) or ast.is_bql(exp): tmpname = 'v%d' % (len(simcols), ) simcols.append(ast.SelColExp(exp, tmpname)) return ast.ExpCol(None, tmpname) elif isinstance(exp, ast.ExpLit) or \ isinstance(exp, ast.ExpNumpar) or \ isinstance(exp, ast.ExpNampar): return exp elif isinstance(exp, ast.ExpSub) or \ isinstance(exp, ast.ExpExists): # XXX Not really right -- need to provide correct scoping. return exp # XXX subquery scoping elif isinstance(exp, ast.ExpCollate): subexp = _expand_simmodel_exp(exp.expression, simcols) return ast.ExpCollate(subexp, exp.collation) elif isinstance(exp, ast.ExpInQuery): subexp = _expand_simmodel_exp(exp.expression, simcols) subquery = exp.subquery # XXX subquery scoping return ast.ExpIn(subexp, exp.positive, subquery) elif isinstance(exp, ast.ExpInExp): subexp = _expand_simmodel_exp(exp.expression, simcols) subexps = [_expand_simmodel_exp(se, simcols) for se in exp.expressions] return ast.ExpIn(subexp, exp.positive, subexps) elif isinstance(exp, ast.ExpCast): subexp = _expand_simmodel_exp(exp.expression, simcols) return ast.ExpCast(subexp, exp.type) elif isinstance(exp, ast.ExpApp): operands = [ _expand_simmodel_exp(operand, simcols) for operand in exp.operands ] return ast.ExpApp(exp.distinct, exp.operator, operands) elif isinstance(exp, ast.ExpAppStar): return exp elif isinstance(exp, ast.ExpCase): raise NotImplementedError("I'm too lazy to do CASE right now.") elif isinstance(exp, ast.ExpOp): operands = [ _expand_simmodel_exp(operand, simcols) for operand in exp.operands ] return ast.ExpOp(exp.operator, operands) else: assert False, 'Invalid expression: %s' % (repr(exp), )
def test_infer_explicit_samples(): assert parse_bql_string('infer explicit x, predict y with confidence 0.9,' ' predict p with confidence 0.8 as q, predict u confidence v,' ' predict a as b confidence c,' ' predict h confidence k using 42 samples' ' from p') == \ [ast.InferExplicit([ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.SelColExp( ast.ExpBQLPredict('y', ast.ExpLit(ast.LitFloat(.9)), None), None, ), ast.SelColExp( ast.ExpBQLPredict('p', ast.ExpLit(ast.LitFloat(.8)), None), 'q', ), ast.PredCol('u', None, 'v', None), ast.PredCol('a', 'b', 'c', None), ast.PredCol('h', None, 'k', ast.ExpLit(ast.LitInt(42))), ], 'p', None, None, None, None, None)]
def test_parametrized(): assert parse_bql_string('select * from t where id = ?;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNumpar(1), )), None, None, None), 1, {})] assert parse_bql_string('select * from t where id = ?123;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNumpar(123), )), None, None, None), 123, {})] assert parse_bql_string('select * from t where id = :foo;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNampar(1, ':foo'), )), None, None, None), 1, {':foo': 1})] assert parse_bql_string('select * from t where a = :foo and b = @foo;') \ == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNampar(1, ':foo'), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNampar(2, '@foo'), )), )), None, None, None), 2, {':foo': 1, '@foo': 2})] assert parse_bql_string('select * from t where a = $foo and b = ?1;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNampar(1, '$foo'), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNumpar(1), )), )), None, None, None), 1, {'$foo': 1})] assert parse_bql_string('select * from t' + ' where a = ?123 and b = :foo and c = ?124;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNumpar(123), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNampar(124, ':foo'), )), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'c'), ast.ExpNumpar(124), )), )), None, None, None), 124, {':foo': 124})]
def test_select_trivial(): assert parse_bql_string('select null;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitNull(None)), None)], None, None, None, None, None)] assert parse_bql_string("select 'x';") == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitString('x')), None)], None, None, None, None, None)] assert parse_bql_string("select 'x''y';") == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitString("x'y")), None)], None, None, None, None, None)] assert parse_bql_string('select "x";') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'x'), None)], None, None, None, None, None)] assert parse_bql_string('select "x""y";') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'x"y'), None)], None, None, None, None, None)] assert parse_bql_string('select 0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitInt(0)), None)], None, None, None, None, None)] assert parse_bql_string('select 0.;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(0)), None)], None, None, None, None, None)] assert parse_bql_string('select .0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(0)), None)], None, None, None, None, None)] assert parse_bql_string('select 0.0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(0)), None)], None, None, None, None, None)] assert parse_bql_string('select 1e0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(1)), None)], None, None, None, None, None)] assert parse_bql_string('select 1e+1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(10)), None)], None, None, None, None, None)] assert parse_bql_string('select 1e-1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(.1)), None)], None, None, None, None, None)] assert parse_bql_string('select 1.e0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(1)), None)], None, None, None, None, None)] assert parse_bql_string('select .1e0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(.1)), None)], None, None, None, None, None)] assert parse_bql_string('select .1e1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(1)), None)], None, None, None, None, None)] assert parse_bql_string('select 1.e10;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitFloat(1e10)), None)], None, None, None, None, None)] assert parse_bql_string('select all 0;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitInt(0)), None)], None, None, None, None, None)] assert parse_bql_string('select distinct 0;') == \ [ast.Select(ast.SELQUANT_DISTINCT, [ast.SelColExp(ast.ExpLit(ast.LitInt(0)), None)], None, None, None, None, None)] assert parse_bql_string('select 0 as z;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitInt(0)), 'z')], None, None, None, None, None)] assert parse_bql_string('select * from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select t1.* from t1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll('t1')], [ast.SelTab('t1', None)], None, None, None, None)] assert parse_bql_string('select c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'c'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select c as d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'c'), 'd')], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select t.c as d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol('t', 'c'), 'd')], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select t.c as d, p as q, x from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol('t', 'c'), 'd'), ast.SelColExp(ast.ExpCol(None, 'p'), 'q'), ast.SelColExp(ast.ExpCol(None, 'x'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select * from t, u;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None), ast.SelTab('u', None)], None, None, None, None)] assert parse_bql_string('select * from t as u;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', 'u')], None, None, None, None)] assert parse_bql_string('select * where x;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, ast.ExpCol(None, 'x'), None, None, None)] assert parse_bql_string('select * from t where x;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpCol(None, 'x'), None, None, None)] assert parse_bql_string('select * group by x;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, ast.Grouping([ast.ExpCol(None, 'x')], None), None, None)] assert parse_bql_string('select * from t where x group by y;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpCol(None, 'x'), ast.Grouping([ast.ExpCol(None, 'y')], None), None, None)] assert parse_bql_string('select * from t where x group by y, z;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpCol(None, 'x'), ast.Grouping([ast.ExpCol(None, 'y'), ast.ExpCol(None, 'z')], None), None, None)] assert parse_bql_string('select * order by x;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_ASC)], None)] assert parse_bql_string('select * order by x asc;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_ASC)], None)] assert parse_bql_string('select * order by x desc;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_DESC)], None)] assert parse_bql_string('select * order by x, y;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_ASC), ast.Ord(ast.ExpCol(None, 'y'), ast.ORD_ASC)], None)] assert parse_bql_string('select * order by x desc, y;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_DESC), ast.Ord(ast.ExpCol(None, 'y'), ast.ORD_ASC)], None)] assert parse_bql_string('select * order by x, y asc;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, [ast.Ord(ast.ExpCol(None, 'x'), ast.ORD_ASC), ast.Ord(ast.ExpCol(None, 'y'), ast.ORD_ASC)], None)] assert parse_bql_string('select * limit 32;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, None, ast.Lim(ast.ExpLit(ast.LitInt(32)), None))] assert parse_bql_string('select * limit 32 offset 16;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, None, ast.Lim(ast.ExpLit(ast.LitInt(32)), ast.ExpLit(ast.LitInt(16))))] assert parse_bql_string('select * limit 16, 32;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], None, None, None, None, ast.Lim(ast.ExpLit(ast.LitInt(32)), ast.ExpLit(ast.LitInt(16))))] assert parse_bql_string('select (select0);') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'select0'), None)], None, None, None, None, None)] assert parse_bql_string('select (select 0);') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpSub(ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpLit(ast.LitInt(0)), None)], None, None, None, None, None) ), None, )], None, None, None, None, None)] assert parse_bql_string('select f(f(), f(x), f(*), f(distinct x), y);') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpApp(False, 'f', [ ast.ExpApp(False, 'f', []), ast.ExpApp(False, 'f', [ast.ExpCol(None, 'x')]), ast.ExpAppStar('f'), ast.ExpApp(True, 'f', [ast.ExpCol(None, 'x')]), ast.ExpCol(None, 'y'), ]), None, )], None, None, None, None, None)]
def test_trivial_commands(): assert parse_bql_string(''' create population satellites for satellites_ucs ( MODEL country_of_operator, orbit_type AS categorical; MODEL launch_mass AS numerical; MODEL perigee AS numerical; MODEL apogee, period AS numerical ) ''') == \ [ast.CreatePop(False, 'satellites', 'satellites_ucs', [ ast.PopModelVars( ['country_of_operator', 'orbit_type'], 'categorical'), ast.PopModelVars(['launch_mass'], 'numerical'), ast.PopModelVars(['perigee'], 'numerical'), ast.PopModelVars(['apogee', 'period'], 'numerical'), ])] assert parse_bql_string('drop population satellites') == \ [ast.DropPop(False, 'satellites')] assert parse_bql_string('create generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc for t with baseline crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen( 't_cc', False, 't', ast.Baseline('crosscat', []), None, # Defaults to cgpm. [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc if not exists' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', True, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator if not exists t_cc' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', True, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] # XXX Schema of [[]] instead of [] is kinda wacky. Fix? (But # make sure the empty-parens and no-parens cases are equivalent.) assert parse_bql_string('create generator t_cc' ' for t using crosscat()') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [[]])] assert parse_bql_string('create generator t_cc' ' for t using crosscat') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [[]])] assert parse_bql_string('initialize 1 model for t;') == \ [ast.InitModels(False, 't', 1)] assert parse_bql_string('initialize 1 model if not exists for t;') == \ [ast.InitModels(True, 't', 1)] assert parse_bql_string('initialize 2 models for t;') == \ [ast.InitModels(False, 't', 2)] assert parse_bql_string('initialize 2 models if not exists for t;') == \ [ast.InitModels(True, 't', 2)] assert parse_bql_string('create temporary table tx as' ' infer explicit x, predict x as xi confidence xc from t_cc') == \ [ast.CreateTabAs(True, False, 'tx', ast.InferExplicit( [ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.PredCol('x', 'xi', 'xc', None), ], 't_cc', None, None, None, None, None, ))]
def test_select_bql(): assert parse_bql_string('select predictive probability of c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLPredProb('c'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, * from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColAll(None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select c, predictive probability of d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'c'), None), ast.SelColExp(ast.ExpBQLPredProb('d'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c = 42 from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(42)))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)) )), [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=5) with respect to age from t1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(5)), )), [ast.ColListLit(['age'])]), None)], [ast.SelTab('t1', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c, d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8)' ' with respect to (c, d) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c']), ast.ColListLit(['d'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) with respect to' + ' (estimate * from columns of t order by ' + ' probability of value 4 limit 1)' + ' from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListSub( ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLProbFn( ast.ExpLit(ast.LitInt(4)), []), ast.ORD_ASC)], ast.Lim(ast.ExpLit(ast.LitInt(1)), None)) )]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select dependence probability with c from t;') ==\ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select dependence probability of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', None, None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select mutual information of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', 'd', None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information of c with d' + ' using (1+2) samples from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', 'd', None, ast.op( ast.OP_ADD, ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)))), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information with c given (d, a=1) using 10 samples from t; ''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', None, [('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1)))], ast.ExpLit(ast.LitInt(10)) ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information of b with c given (d, a=1, e, r=2) from t;''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'b', 'c', [ ('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1))), ('e', ast.ExpLit(ast.LitNull(0))), ('r', ast.ExpLit(ast.LitInt(2))), ], None ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] # XXX This got broken a while ago: parenthesization in PROBABILITY # OF X = E is too permissive. I didn't notice because before I # introduced BQLParseError, this simply caught Exception -- which # covered the AssertionError that this turned into. # # with pytest.raises(parse.BQLParseError): # parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') # # XXX Should really be this test, but getting the grammar to # # admit this unambiguously is too much of a pain at the # # moment. # assert parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') == \ # [ast.Select(ast.SELQUANT_ALL, # [ast.SelColExp(ast.ExpBQLProb([('x', # ast.ExpOp(ast.OP_SUB, ( # ast.ExpLit(ast.LitInt(1)), # ast.ExpBQLProb([('y', # ast.ExpLit(ast.LitInt(0)))], # []), # )))], # []), # None)], # [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c1 = f(c2) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c1', ast.ExpApp(False, 'f', [ast.ExpCol(None, 'c2')]))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select key, t.(estimate * from columns of t' ' order by dependence probability with c desc limit 4)' ' from t order by key asc') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'key'), None), ast.SelColSub('t', ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLDepProb('c', None), ast.ORD_DESC)], ast.Lim(ast.ExpLit(ast.LitInt(4)), None))) ], [ast.SelTab('t', None)], None, None, [ast.Ord(ast.ExpCol(None, 'key'), ast.ORD_ASC)], None)]
def p_primary_tabcol(self, tab, col): return ast.ExpCol(tab, col)
def p_primary_column(self, col): return ast.ExpCol(None, col)
def test_trivial_commands(): assert parse_bql_string('create generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(False, 't_cc', False, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create default generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(True, 't_cc', False, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc if not exists' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(False, 't_cc', True, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('initialize 1 model for t;') == \ [ast.InitModels(False, 't', 1, None)] assert parse_bql_string('initialize 1 model if not exists for t;') == \ [ast.InitModels(True, 't', 1, None)] assert parse_bql_string('initialize 2 models for t;') == \ [ast.InitModels(False, 't', 2, None)] assert parse_bql_string('initialize 2 models if not exists for t;') == \ [ast.InitModels(True, 't', 2, None)] assert parse_bql_string('analyze t for 1 iteration;') == \ [ast.AnalyzeModels('t', None, 1, None, None, None, False)] assert parse_bql_string('analyze t for 1 iteration wait;') == \ [ast.AnalyzeModels('t', None, 1, None, None, None, True)] assert parse_bql_string('analyze t for 1 minute;') == \ [ast.AnalyzeModels('t', None, None, 60, None, None, False)] assert parse_bql_string('analyze t for 1 minute wait;') == \ [ast.AnalyzeModels('t', None, None, 60, None, None, True)] assert parse_bql_string('analyze t for 2 minutes;') == \ [ast.AnalyzeModels('t', None, None, 120, None, None, False)] assert parse_bql_string('analyze t for 2 minutes wait;') == \ [ast.AnalyzeModels('t', None, None, 120, None, None, True)] assert parse_bql_string('analyze t for 1 second;') == \ [ast.AnalyzeModels('t', None, None, 1, None, None, False)] assert parse_bql_string('analyze t for 1 second wait;') == \ [ast.AnalyzeModels('t', None, None, 1, None, None, True)] assert parse_bql_string('analyze t for 2 seconds;') == \ [ast.AnalyzeModels('t', None, None, 2, None, None, False)] assert parse_bql_string('analyze t for 2 seconds wait;') == \ [ast.AnalyzeModels('t', None, None, 2, None, None, True)] assert parse_bql_string('analyze t model 1 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1], 1, None, None, None, False)] assert parse_bql_string('analyze t models 1,2,3 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1,2,3], 1, None, None, None, False)] assert parse_bql_string('analyze t models 1-3,5 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1,2,3,5], 1, None, None, None, False)] assert parse_bql_string('analyze t for 10 iterations' ' checkpoint 3 iterations') == \ [ast.AnalyzeModels('t', None, 10, None, 3, None, False)] assert parse_bql_string('analyze t for 10 seconds' ' checkpoint 3 seconds') == \ [ast.AnalyzeModels('t', None, None, 10, None, 3, False)] assert parse_bql_string('create temporary table tx as' ' infer explicit x, predict x as xi confidence xc from t_cc') == \ [ast.CreateTabAs(True, False, 'tx', ast.InferExplicit( [ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.PredCol('x', 'xi', 'xc'), ], 't_cc', ast.ExpLit(ast.LitNull(None)), None, None, None, None, ))]
def p_primary_tabcol(self, tab, col): return ast.ExpCol(tab, col) def p_primary_case(self, k, ws, e): return ast.ExpCase(k, ws, e)
def p_primary_column(self, col): return ast.ExpCol(None, col) def p_primary_tabcol(self, tab, col): return ast.ExpCol(tab, col)