示例#1
0
        def test_fasttext_explain(self):

            mldb.log("explain")

            cls_config = {
                "my_fasttext": {
                    "type": "fasttext",
                    "verbosity" : 0,
                    "dims" : 4,
                    "epoch" : 5,
                }
            }

            tmp_file =  tempfile.NamedTemporaryFile(prefix=os.getcwd() + '/build/x86_64/tmp/')

            mldb.put("/v1/procedures/trainer", {
                "type": "classifier.train",
                "params": {
                    "trainingData": "SELECT {tokens.*} as features, Theme as label FROM bag_of_words",
                    "modelFileUrl": "file:///" + tmp_file.name,
                    "functionName" : 'myclassify',
                    "algorithm": "my_fasttext",
                    "mode": "categorical",
                    "runOnCreation": True,
                    "configuration": cls_config
                }
            })
            
            mldb.put("/v1/functions/explain", {
                "type": "classifier.explain",
                "params": {
                    "modelFileUrl": "file:///" + tmp_file.name,
                }
            })

            res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens},
                                                label : 'Politique'}) as * 
                            """)

            self.assertTableResultEquals(res, [
                [
                    "_rowName",
                    "bias",
                    "explanation.tokens.alabama",
                    "explanation.tokens.futbol",
                    "explanation.tokens.hockey"
                ],
                [
                    "result",
                    0,
                    -0.006820799317210913,
                    -0.07053825259208679,
                    -0.08547607064247131
                ]
            ]);

            with self.assertRaisesRegex(ResponseException, "label not in model"):
                res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens},
                                                    label : 'Futurama'}) as * 
                                """)
示例#2
0
    def test_join_with_and(self):
        resp = mldb.query('select * from ds_train')
        mldb.log(resp)

        mldb.post(
            "/v1/procedures", {
                "type": "transform",
                "params": {
                    "inputData": """
                        select *
                        from ds left join ds_stats on (ds.dow=ds_stats.dow and ds.a_int=ds_stats.a_int)
                        limit 10
                    """,
                    "outputDataset": {
                        "id": "ds_train2",
                        "type": "tabular",
                        "params": {
                            "unknownColumns": "add"
                        }
                    },
                    "runOnCreation": True
                }
            })

        resp2 = mldb.query('select * from ds_train2')
        mldb.log(resp2)

        # equivalent join conditions should be returning the same dataset
        # this is a very weak check because the columns and the row ordering
        # of these two equivalent joins are currently very different
        self.assertEqual(len(resp), len(resp2),
                         'expected response sizes to match')
示例#3
0
    def test_r2(self):
        ds = mldb.create_dataset({"id": "r2_sample", "type": "sparse.mutable"})
        ds.record_row("a",
                      [["score", 2.5, 0], ["score2", 25, 0], ["target", 3, 0]])
        ds.record_row(
            "b", [["score", 0, 0], ["score2", -5, 0], ["target", -0.5, 0]])
        ds.record_row("c",
                      [["score", 2, 0], ["score2", 22, 0], ["target", 2, 0]])
        ds.record_row("d",
                      [["score", 8, 0], ["score2", 5, 0], ["target", 7, 0]])
        ds.commit()

        for scoreCol, r2 in [("score", 0.948), ("score2", -30.1177)]:
            rez = mldb.put(
                "/v1/procedures/patate", {
                    "type": "classifier.test",
                    "params": {
                        "testingData":
                        "select %s as score, target as label from r2_sample" %
                        scoreCol,
                        "mode":
                        "regression",
                        "runOnCreation":
                        True
                    }
                })

            mldb.log(rez.json()["status"])
            self.assertAlmostEqual(
                rez.json()["status"]["firstRun"]["status"]["r2"], r2, places=2)
示例#4
0
    def test_it(self):
        ds = mldb.create_dataset({'id': 'ds', 'type': 'sparse.mutable'})
        ds.record_row('row1', [])
        ds.commit()

        query = """
            SELECT jseval('
                {}
                return {{"foo" : "bar"}};
                ',
                'cols',
                {{*}}
            ) AS *
            FROM ds
                """

        # the query works
        mldb.log(mldb.query(query.format("")))

        # add an exception, good luck understanding what's going on now...
        try:
            mldb.query(query.format('throw "this query is weird";'))

        except ResponseException as exc:
            mldb.log(exc.response.json())
            assert 'this query is weird' in exc.response.json()['error']
        else:
            assert False, 'should not be here'
    def test_min_returns_last_event(self):
        # expressions are evaluated at latest time
        resp = mldb.query(
            'select min(x) as min_x from dataset order by rowName()')
        mldb.log(resp)

        self.assertTableResultEquals(resp, [["_rowName", "min_x"], ["[]", 2]])
    def test_MLDB_1386(self):
        conf = {
            "type": "classifier.train",
            "params": {
                "trainingData": """
                    select {donotexist} as features, label from dataset
                """,
                "modelFileUrl": "file://tmp/my_model.cls",
                "algorithm": "glz",
                "configuration": {
                    "glz": {
                        "type": "glz",
                        "verbosity": 3,
                        "normalize": False,
                        "link_function": 'linear',
                        "regularization": 'none'
                    }
                },
                "mode": "regression",
                "functionName": "myScorer1386",
                "runOnCreation": True
            }
        }

        mldb.log(mldb.put("/v1/procedures/trainer1386", conf).json())
示例#7
0
    def bad(self):
        if self.limit == 1:
            mldb.log(self)
            raise Exception("Over")

        self._end = self._middle
        self._update_middle()
示例#8
0
def train_svd_with_default():
    svd_procedure = "/v1/procedures/svd"
    # svd procedure configuration
    svd_config = {
        'type': 'svd.train',
        'params': {
            "trainingData": "select * from dataset1",

            # first way to specify output dataset using default
            "rowOutputDataset": "svd_row",

            # second way to specify an output dataset using default
            "columnOutputDataset": {
                "id": "svd_column"
            }
        }
    }

    result = mldb.put(svd_procedure, svd_config)
    mldb.log(result)

    result = mldb.post(svd_procedure + '/runs')
    mldb.log(result)

    result = mldb.get('/v1/datasets/svd_column')
    assert result.json()['type'] == 'embedding', \
        'expected an embedding output dataset'

    result = mldb.get('/v1/datasets/svd_row')
    assert result.json()['type'] == 'embedding', \
        'expected an embedding output dataset'
    def test_python_script_apply_with_utf8(self):
        mldb.put(
            "/v1/functions/filter_top_themes", {
                "type": "script.apply",
                "params": {
                    "language": 'python',
                    "scriptConfig": {
                        "source":
                        """
from mldb import mldb
# retrieve all themes
mldb.log(mldb.script.args)

request.set_return([[str(mldb.script.args[0][1]), 0, '1970-01-01T00:00:00.0000000Z']])
"""
                    }
                }
            })

        self.assertTableResultEquals(
            mldb.query("""
        SELECT filter_top_themes(
            {{"Politique Provinciale":2, "Élections":1, "Thèmes et sous-thàmes":0} AS args}
        ) AS *
        """), [["_rowName", "return.['Thèmes et sous-thàmes', [0, '-Inf']]"],
               ["result", 0]])
    def test_mldbfb_520_join(self):
        """
        temporal_earliest doesn't yield correct result when used with
        join expressions.
        """
        ds = mldb.create_dataset({
            'id': 'mldbfb520_join_left',
            'type': 'sparse.mutable'
        })
        ds.record_row('user1', [['behA', 1, 1], ['behA', 1, 2], ['behA', 1, 3],
                                ['behB', 1, 9], ['behC', 1, 8]])
        ds.commit()

        ds = mldb.create_dataset({
            'id': 'mldbfb520_join_right',
            'type': 'sparse.mutable'
        })
        ds.record_row(
            'user1',
            [['behD', 1, 1], ['behD', 1, 2], ['behD', 1, 3], ['behB', 1, 9]])
        ds.commit()

        query = """
            SELECT temporal_earliest({
            COLUMN EXPR (WHERE columnName() IN ('l.behA', 'l.behB', 'r.behD'))}) AS * 
            FROM mldbfb520_join_left AS l
            INNER JOIN mldbfb520_join_right as r
            ON l.behB = r.behB
        """
        res = mldb.get('/v1/query', q=query)

        expected = [{
            "rowName":
            "[user1]-[user1]",
            "columns": [["l.behA", 1, "1970-01-01T00:00:01Z"],
                        ["l.behB", 1, "1970-01-01T00:00:09Z"],
                        ["r.behD", 1, "1970-01-01T00:00:01Z"]]
        }]
        mldb.log(res)
        self.assertFullResultEquals(res.json(), expected)

        query = """
            SELECT temporal_latest({
            COLUMN EXPR (WHERE columnName() IN ('l.behA', 'l.behB', 'r.behD'))}) AS * 
            FROM mldbfb520_join_left AS l
            INNER JOIN mldbfb520_join_right as r
            ON l.behB = r.behB
        """
        res = mldb.get('/v1/query', q=query)

        expected = [{
            "rowName":
            "[user1]-[user1]",
            "columns": [["l.behA", 1, "1970-01-01T00:00:03Z"],
                        ["l.behB", 1, "1970-01-01T00:00:09Z"],
                        ["r.behD", 1, "1970-01-01T00:00:03Z"]]
        }]
        mldb.log(res)
        self.assertFullResultEquals(res.json(), expected)
示例#11
0
    def test_join_no_on_clause(self):

        res = mldb.query('select test1.x from (select \'toy story\' as x) as test1 join atom_dataset({"toy story": 1, "terminator": 5}) as test2 where regex_search(test1.x, test2.column)')
        mldb.log(res)

        expected = [["_rowName","test1.x"],["[result]-[1]","toy story"]]

        self.assertEqual(res, expected);
    def test_right(self):

        res1 = mldb.query("select rightRowName() from ds1 join ds2")
        mldb.log(res1)

        expected = [["_rowName", "rightRowName()"], ["[x]-[y]", "y"]]

        self.assertEqual(res1, expected)
示例#13
0
def assert_fail(qry):
    try:
        mldb.get('/v1/query', q=qry)
    except ResponseException as exc:
        result = exc.response
    else:
        assert False, 'should not be here'
    mldb.log(result.text)