def test_fasttext_explain(self): mldb.log("explain") cls_config = { "my_fasttext": { "type": "fasttext", "verbosity" : 0, "dims" : 4, "epoch" : 5, } } tmp_file = tempfile.NamedTemporaryFile(prefix=os.getcwd() + '/build/x86_64/tmp/') mldb.put("/v1/procedures/trainer", { "type": "classifier.train", "params": { "trainingData": "SELECT {tokens.*} as features, Theme as label FROM bag_of_words", "modelFileUrl": "file:///" + tmp_file.name, "functionName" : 'myclassify', "algorithm": "my_fasttext", "mode": "categorical", "runOnCreation": True, "configuration": cls_config } }) mldb.put("/v1/functions/explain", { "type": "classifier.explain", "params": { "modelFileUrl": "file:///" + tmp_file.name, } }) res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens}, label : 'Politique'}) as * """) self.assertTableResultEquals(res, [ [ "_rowName", "bias", "explanation.tokens.alabama", "explanation.tokens.futbol", "explanation.tokens.hockey" ], [ "result", 0, -0.006820799317210913, -0.07053825259208679, -0.08547607064247131 ] ]); with self.assertRaisesRegex(ResponseException, "label not in model"): res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens}, label : 'Futurama'}) as * """)
def test_join_with_and(self): resp = mldb.query('select * from ds_train') mldb.log(resp) mldb.post( "/v1/procedures", { "type": "transform", "params": { "inputData": """ select * from ds left join ds_stats on (ds.dow=ds_stats.dow and ds.a_int=ds_stats.a_int) limit 10 """, "outputDataset": { "id": "ds_train2", "type": "tabular", "params": { "unknownColumns": "add" } }, "runOnCreation": True } }) resp2 = mldb.query('select * from ds_train2') mldb.log(resp2) # equivalent join conditions should be returning the same dataset # this is a very weak check because the columns and the row ordering # of these two equivalent joins are currently very different self.assertEqual(len(resp), len(resp2), 'expected response sizes to match')
def test_r2(self): ds = mldb.create_dataset({"id": "r2_sample", "type": "sparse.mutable"}) ds.record_row("a", [["score", 2.5, 0], ["score2", 25, 0], ["target", 3, 0]]) ds.record_row( "b", [["score", 0, 0], ["score2", -5, 0], ["target", -0.5, 0]]) ds.record_row("c", [["score", 2, 0], ["score2", 22, 0], ["target", 2, 0]]) ds.record_row("d", [["score", 8, 0], ["score2", 5, 0], ["target", 7, 0]]) ds.commit() for scoreCol, r2 in [("score", 0.948), ("score2", -30.1177)]: rez = mldb.put( "/v1/procedures/patate", { "type": "classifier.test", "params": { "testingData": "select %s as score, target as label from r2_sample" % scoreCol, "mode": "regression", "runOnCreation": True } }) mldb.log(rez.json()["status"]) self.assertAlmostEqual( rez.json()["status"]["firstRun"]["status"]["r2"], r2, places=2)
def test_it(self): ds = mldb.create_dataset({'id': 'ds', 'type': 'sparse.mutable'}) ds.record_row('row1', []) ds.commit() query = """ SELECT jseval(' {} return {{"foo" : "bar"}}; ', 'cols', {{*}} ) AS * FROM ds """ # the query works mldb.log(mldb.query(query.format(""))) # add an exception, good luck understanding what's going on now... try: mldb.query(query.format('throw "this query is weird";')) except ResponseException as exc: mldb.log(exc.response.json()) assert 'this query is weird' in exc.response.json()['error'] else: assert False, 'should not be here'
def test_min_returns_last_event(self): # expressions are evaluated at latest time resp = mldb.query( 'select min(x) as min_x from dataset order by rowName()') mldb.log(resp) self.assertTableResultEquals(resp, [["_rowName", "min_x"], ["[]", 2]])
def test_MLDB_1386(self): conf = { "type": "classifier.train", "params": { "trainingData": """ select {donotexist} as features, label from dataset """, "modelFileUrl": "file://tmp/my_model.cls", "algorithm": "glz", "configuration": { "glz": { "type": "glz", "verbosity": 3, "normalize": False, "link_function": 'linear', "regularization": 'none' } }, "mode": "regression", "functionName": "myScorer1386", "runOnCreation": True } } mldb.log(mldb.put("/v1/procedures/trainer1386", conf).json())
def bad(self): if self.limit == 1: mldb.log(self) raise Exception("Over") self._end = self._middle self._update_middle()
def train_svd_with_default(): svd_procedure = "/v1/procedures/svd" # svd procedure configuration svd_config = { 'type': 'svd.train', 'params': { "trainingData": "select * from dataset1", # first way to specify output dataset using default "rowOutputDataset": "svd_row", # second way to specify an output dataset using default "columnOutputDataset": { "id": "svd_column" } } } result = mldb.put(svd_procedure, svd_config) mldb.log(result) result = mldb.post(svd_procedure + '/runs') mldb.log(result) result = mldb.get('/v1/datasets/svd_column') assert result.json()['type'] == 'embedding', \ 'expected an embedding output dataset' result = mldb.get('/v1/datasets/svd_row') assert result.json()['type'] == 'embedding', \ 'expected an embedding output dataset'
def test_python_script_apply_with_utf8(self): mldb.put( "/v1/functions/filter_top_themes", { "type": "script.apply", "params": { "language": 'python', "scriptConfig": { "source": """ from mldb import mldb # retrieve all themes mldb.log(mldb.script.args) request.set_return([[str(mldb.script.args[0][1]), 0, '1970-01-01T00:00:00.0000000Z']]) """ } } }) self.assertTableResultEquals( mldb.query(""" SELECT filter_top_themes( {{"Politique Provinciale":2, "Élections":1, "Thèmes et sous-thàmes":0} AS args} ) AS * """), [["_rowName", "return.['Thèmes et sous-thàmes', [0, '-Inf']]"], ["result", 0]])
def test_mldbfb_520_join(self): """ temporal_earliest doesn't yield correct result when used with join expressions. """ ds = mldb.create_dataset({ 'id': 'mldbfb520_join_left', 'type': 'sparse.mutable' }) ds.record_row('user1', [['behA', 1, 1], ['behA', 1, 2], ['behA', 1, 3], ['behB', 1, 9], ['behC', 1, 8]]) ds.commit() ds = mldb.create_dataset({ 'id': 'mldbfb520_join_right', 'type': 'sparse.mutable' }) ds.record_row( 'user1', [['behD', 1, 1], ['behD', 1, 2], ['behD', 1, 3], ['behB', 1, 9]]) ds.commit() query = """ SELECT temporal_earliest({ COLUMN EXPR (WHERE columnName() IN ('l.behA', 'l.behB', 'r.behD'))}) AS * FROM mldbfb520_join_left AS l INNER JOIN mldbfb520_join_right as r ON l.behB = r.behB """ res = mldb.get('/v1/query', q=query) expected = [{ "rowName": "[user1]-[user1]", "columns": [["l.behA", 1, "1970-01-01T00:00:01Z"], ["l.behB", 1, "1970-01-01T00:00:09Z"], ["r.behD", 1, "1970-01-01T00:00:01Z"]] }] mldb.log(res) self.assertFullResultEquals(res.json(), expected) query = """ SELECT temporal_latest({ COLUMN EXPR (WHERE columnName() IN ('l.behA', 'l.behB', 'r.behD'))}) AS * FROM mldbfb520_join_left AS l INNER JOIN mldbfb520_join_right as r ON l.behB = r.behB """ res = mldb.get('/v1/query', q=query) expected = [{ "rowName": "[user1]-[user1]", "columns": [["l.behA", 1, "1970-01-01T00:00:03Z"], ["l.behB", 1, "1970-01-01T00:00:09Z"], ["r.behD", 1, "1970-01-01T00:00:03Z"]] }] mldb.log(res) self.assertFullResultEquals(res.json(), expected)
def test_join_no_on_clause(self): res = mldb.query('select test1.x from (select \'toy story\' as x) as test1 join atom_dataset({"toy story": 1, "terminator": 5}) as test2 where regex_search(test1.x, test2.column)') mldb.log(res) expected = [["_rowName","test1.x"],["[result]-[1]","toy story"]] self.assertEqual(res, expected);
def test_right(self): res1 = mldb.query("select rightRowName() from ds1 join ds2") mldb.log(res1) expected = [["_rowName", "rightRowName()"], ["[x]-[y]", "y"]] self.assertEqual(res1, expected)
def assert_fail(qry): try: mldb.get('/v1/query', q=qry) except ResponseException as exc: result = exc.response else: assert False, 'should not be here' mldb.log(result.text)