示例#1
0
def train_classifier(when):
    mldb.put(
        "/v1/procedures/tng_classif", {
            "type": "classifier.train",
            "params": {
                "trainingData": {
                    "select": "{* EXCLUDING (x)} as features, x as label",
                    "when": when,
                    "from": {
                        "id": "dataset1"
                    }
                },
                "configuration": {
                    "glz": {
                        "type": "glz",
                        "verbosity": 3,
                        "normalize": True,
                        "regularization": 'l2'
                    }
                },
                "algorithm": "glz",
                "modelFileUrl": "file://tmp/MLDB-945.tng.cls"
            }
        })

    mldb.post('/v1/procedures/tng_classif/runs')
    def test_svd(self):
        
        # this is throwing because the not_yet_created dataset 
        # does not exist

        with self.assertRaises(ResponseException) as re:
            mldb.put("/v1/datasets/training_data",{
                "type": "merged",
                "params": {
                    "datasets": [
                        {"id": "sample"},
                        {"id": "not_yet_created"} # attention
                    ]
                }
            })

        # we want to store output in 'not_yet_created'
        # the fact we tried to access 'not_yet_created' above
        # makes the first attempt to create it fail
        
        mldb.put("/v1/procedures/train_svd", {
            "type": "svd.train",
            "params": {
                "rowOutputDataset": "not_yet_created", # attention
                "outputColumn": "svd.embedding.00",
                "modelFileUrl": "file://tmp/svd.bin.test.gz",
                "trainingData": "select * from sample",
                "numSingularValues": 1,
                "runOnCreation": True
            }
        })
        

        # this should now work
        mldb.get("/v1/query", q="select x from not_yet_created")
示例#3
0
 def test_mldb_put_dataset(self):
     _id = 'épopée'
     url = quote('/v1/datasets/' + _id)
     mldb.log(url)
     mldb.put(url, {'type': 'sparse.mutable'})
     res = mldb.get(url).json()
     self.assertEqual(res['id'], _id)
示例#4
0
    def test_query_first_row(self):
        # Example of a query passed straight to mongodb. The result comes back
        # formatted as an MLDB result.
        mldb.put('/v1/functions/mongo_query', {
            'type' : 'mongodb.query',
            'params' : {
                'uriConnectionScheme' : self.connection_scheme,
                'collection' : 'test_collection'
            }
        })
        query = json.dumps({
            'type' : {
                '$eq' : 'nested_obj'
            }
        })
        res = mldb.get('/v1/functions/mongo_query/application',
                       input={'query' : query}).json()
        self.assertEqual(res['output']['type'], 'nested_obj')

        _id = res['output']['_id']
        query = json.dumps({
            '_id' : _id
        })
        res = mldb.get('/v1/functions/mongo_query/application',
                       input={'query' : query}).json()
        self.assertEqual(res['output']['type'], 'nested_obj')
示例#5
0
    def test_onevsall_simple(self):
        conf = {
            "type": "classifier.train",
            "params": {
                "trainingData": """
                                   select {* EXCLUDING(label0, label1)} as features, 
                                   {label0, label1} as label from trivial
                                """,
                "modelFileUrl": "file://build/x86_64/tmp/multilabel1.cls",
                "algorithm": "dt",
                "mode": "multilabel",
                "multilabelStrategy": "one-vs-all",
                "functionName": "classifyMe",
                "configuration": {
                    "dt": {
                        "type": "decision_tree",
                        "max_depth": 8,
                        "verbosity": 0,
                        "update_alg": "gentle",
                        "random_feature_propn": 1
                    }
                },
            }
        }

        mldb.put("/v1/procedures/multilabel_train", conf)

        res = mldb.query(
            "SELECT classifyMe({features : {5 as feat1, 0 as feat2}}) as *")
        self.assertTableResultEquals(res, [[
            "_rowName", "scores.\"\"\"label0\"\"\"",
            "scores.\"\"\"label1\"\"\""
        ], ["result", 0.9999726414680481, 2.73847472271882e-05]])
示例#6
0
    def test_empty_json(self):
        """
        Empty JSON returns the proper code
        """
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes":
                        """
if request.verb in ['GET', 'DELETE']:
    request.set_return({}, 200)
else:
    request.set_return({}, 201)
"""
                    }
                }
            })

        res = mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), {})

        res = mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 201)
        self.assertEqual(res.json(), {})

        res = mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 201)
        self.assertEqual(res.json(), {})

        res = mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), {})
示例#7
0
    def test_set_return_0(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes": """request.set_return("", 0)"""
                    }
                }
            })

        with self.assertRaises(ResponseException) as e:
            mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaises(ResponseException) as e:
            mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)
    def test_wildcard(self):

        cls_config = {
                "my_fasttext": {
                    "type": "fasttext",
                    "verbosity" : 0,
                    "dims" : 4,
                    "epoch" : 5,
                }
            }

        with self.assertMldbRaises(expected_regexp=
    "Dataset column 'tokens.alabama' cannot be used in both label and feature because of label wildcard 'tokens'"):
            mldb.put("/v1/procedures/trainer", {
                "type": "classifier.train",
                "params": {
                    "trainingData": "SELECT {tokens.*} as features, {tokens.*} as label FROM bag_of_words",
                    "modelFileUrl": "file://tmp/src_fasttext.cls",
                    "functionName" : 'myclassify',
                    "algorithm": "my_fasttext",
                    "mode": "multilabel",
                    "runOnCreation": True,
                    "configuration": cls_config
                }
            })
    def test_do_not_run_on_creation(self):
        mldb.put('/v1/procedures/do_not_run_on_creation', {
            'type' : 'transform',
            'params' : {
                'skipEmptyRows' : False,
                'runOnCreation' : False
            }
        })

        msg = 'You need to define inputData'
        with self.assertRaisesRegex(ResponseException, msg):
            mldb.put('/v1/procedures/do_not_run_on_creation/runs/r1', {
                'params' : {}
            })

        res = mldb.post('/v1/procedures', {
            'type' : 'transform',
            'params' : {
                'skipEmptyRows' : False,
                'runOnCreation' : False
            }
        }).json()
        with self.assertRaisesRegex(ResponseException, msg):
            mldb.post('/v1/procedures/{}/runs'.format(res['id']), {
                'params' : {}
            })
示例#10
0
    def test_assert_full_result_equals(self):
        url = '/v1/datasets/ds'
        mldb.put(url, {
            'type' : 'sparse.mutable'
        })
        mldb.post(url + '/rows', {
            'rowName' : 'row1',
            'columns' : [['colA', 1, 0]]
        })
        mldb.post(url + '/rows', {
            'rowName' : 'row2',
            'columns' : [['colB', 2, 1]]
        })
        mldb.post(url + '/commit')

        res = mldb.get('/v1/query',
                       q="SELECT colA, colB FROM ds ORDER BY rowName()").json()
        self.assertFullResultEquals(res, [
            {
                'rowName' : 'row1',
                'columns' : [['colA', 1, '1970-01-01T00:00:00Z'],
                             ['colB', None, '-Inf']]
            },
            {
                'rowName' : 'row2',
                'columns' : [
                    ['colA', None, '-Inf'],
                    ['colB', 2, '1970-01-01T00:00:01Z']]
            }
        ])
    def setUpClass(self):
        mldb.put("/v1/procedures/csv_proc", {
                    "type": "import.text",
                    "params": {
                        'dataFileUrl' : 'file://mldb/testing/dataset/fasttext_train.csv',
                        "outputDataset": {
                            "id": "src_train",                    
                        },
                        "ignoreBadLines" : True,
                        "allowMultiLines" : True,
                        "structuredColumnNames" : True,
                        "limit" : 10000,
                    }
                }) 

        mldb.put("/v1/procedures/baggify", {
            "type": "transform",
            "params": {
                "inputData": """
                select Theme, tokenize(lower(Body), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) 
                as tokens from src_train       
                """,
                "outputDataset": {
                    "id": "bag_of_words",
                    "type": "sparse.mutable"
                },
            }
        })
示例#12
0
        def test_fasttext_explain(self):

            mldb.log("explain")

            cls_config = {
                "my_fasttext": {
                    "type": "fasttext",
                    "verbosity" : 0,
                    "dims" : 4,
                    "epoch" : 5,
                }
            }

            tmp_file =  tempfile.NamedTemporaryFile(prefix=os.getcwd() + '/build/x86_64/tmp/')

            mldb.put("/v1/procedures/trainer", {
                "type": "classifier.train",
                "params": {
                    "trainingData": "SELECT {tokens.*} as features, Theme as label FROM bag_of_words",
                    "modelFileUrl": "file:///" + tmp_file.name,
                    "functionName" : 'myclassify',
                    "algorithm": "my_fasttext",
                    "mode": "categorical",
                    "runOnCreation": True,
                    "configuration": cls_config
                }
            })
            
            mldb.put("/v1/functions/explain", {
                "type": "classifier.explain",
                "params": {
                    "modelFileUrl": "file:///" + tmp_file.name,
                }
            })

            res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens},
                                                label : 'Politique'}) as * 
                            """)

            self.assertTableResultEquals(res, [
                [
                    "_rowName",
                    "bias",
                    "explanation.tokens.alabama",
                    "explanation.tokens.futbol",
                    "explanation.tokens.hockey"
                ],
                [
                    "result",
                    0,
                    -0.006820799317210913,
                    -0.07053825259208679,
                    -0.08547607064247131
                ]
            ]);

            with self.assertRaisesRegex(ResponseException, "label not in model"):
                res = mldb.query("""SELECT explain({features : {tokenize(lower(' hockey Alabama Futbol'), {splitChars:' ,.:;«»[]()%!?', quoteChar:'', minTokenLength: 2}) as tokens},
                                                    label : 'Futurama'}) as * 
                                """)
示例#13
0
 def setUpClass(cls):
     mldb.put('/v1/plugins/deepteach', {
         "type": "python",
         "params": {
             "address": "git://github.com/mldbai/deepteach#ip-rndfrst-prob",
         }
     })
示例#14
0
def train_svd(when, output_index):
    global dataset_index
    dataset_index += 1

    svd_procedure = "/v1/procedures/when_svd"
    # svd procedure configuration
    svd_config = {
        'type': 'svd.train',
        'params': {
            "trainingData": {
                "from": {
                    "id": "svd_example"
                },
                "when": when
            },
            "rowOutputDataset": {
                "id": "when_svd_row_" + str(dataset_index),
                'type': "embedding"
            },
            "columnOutputDataset": {
                "id": "svd_embedding_" + str(output_index),
                "type": "embedding"
            }
        }
    }

    mldb.put(svd_procedure, svd_config)
    mldb.post(svd_procedure + '/runs')
    result = mldb.get('/v1/query',
                      q="SELECT * FROM when_svd_row_" + str(dataset_index))
    response = result.json()
    return len(response[0]["columns"])
示例#15
0
    def test_spread(self):
        mldb.put(
            "/v1/procedures/split", {
                "type": "split",
                "params": {
                    "labels":
                    "SELECT * FROM ds1",
                    "reproducible":
                    True,
                    "splits": [0.8, 0.2],
                    "outputDatasets": [{
                        "id": "ds_train",
                        "type": "sparse.mutable"
                    }, {
                        "id": "ds_test",
                        "type": "sparse.mutable"
                    }],
                }
            })

        res1 = mldb.query("SELECT * FROM ds_train ORDER BY rowName() DESC")
        res2 = mldb.query("SELECT * FROM ds_test ORDER BY rowName() DESC")

        self.assertEqual(
            res1, [["_rowName", "y", "x"], ["3", 1, None], ["0", None, 1]])

        self.assertEqual(
            res2, [["_rowName", "y", "x"], ["2", 1, None], ["1", None, 1]])
示例#16
0
    def test_const_userfunction_var(self):
        mldb.put('/v1/functions/fetch', {'type': 'fetcher'})
        res = mldb.query(
            "SELECT __isconst(fetch({url: a})) as isconst FROM ds1 ORDER BY rowName()"
        )
        self.assertTableResultEquals(res, [
            [
                '_rowName',
                'isconst',
            ],
            ['row1', False],
        ])

        mldb.put('/v1/functions/fetch2', {
            'type': 'fetcher',
            'deterministic': False
        })
        res = mldb.query(
            "SELECT __isconst(fetch2({url: 'itdoesntreallymatter'})) as isconst FROM ds1 ORDER BY rowName()"
        )
        self.assertTableResultEquals(res, [
            [
                '_rowName',
                'isconst',
            ],
            ['row1', False],
        ])
示例#17
0
    def test_selection_of_creds(self):
        # store a dummy credential for a specific path
        resp = mldb.put(
            "/v1/credentials/badcred", {
                "store": {
                    "resourceType": "aws:s3",
                    "resource": "s3://dummy",
                    "credential": {
                        "provider": "Credentials collection",
                        "protocol": "http",
                        "location": "s3.amazonaws.com",
                        "id": "this is my key",
                        "secret": "this is my secret"
                    }
                }
            })

        csv_conf = {
            "type": "import.text",
            "params": {
                'dataFileUrl': 's3://dummy/test.csv',
                "outputDataset": {
                    "id": "test"
                },
                "runOnCreation": True
            }
        }

        # this is expected to pick the most specific but invalid credentials
        with self.assertRaises(ResponseException) as re:
            resp = mldb.put("/v1/procedures/import", csv_conf)
示例#18
0
    def test_function_creation_bug(self):
        mldb.post(
            "/v1/procedures", {
                "type": "import.text",
                "params": {
                    "dataFileUrl": "http://public.mldb.ai/narrow_test.csv.gz",
                    "outputDataset": "narrow",
                    "runOnCreation": True
                }
            })

        # it seems that the training fails to save the function but we proceed to testing
        # where we try to use the function but then can't find it
        # 1) we should not move to testing if function-creation fails
        # we should report that function-creation failed
        # 2) function creation should not fail for a dt on this dataset

        mldb.put(
            "/v1/procedures/train", {
                "type": "classifier.experiment",
                "params": {
                    "experimentName": "x",
                    "inputData":
                    "select {a} as features, b as label from narrow",
                    "algorithm": "dt",
                    "mode": "regression",
                    "configurationFile":
                    "./mldb/container_files/classifiers.json",
                    "modelFileUrlPattern":
                    "file://tmp/MLDB-1597-creation$runid.cls",
                    "runOnCreation": True
                }
            })
示例#19
0
    def test_no_set_return(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes":
                        """
from mldb import mldb
mldb.log('no return')
"""
                    }
                }
            })

        msg = "Return value is required for route handlers but not set"

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)

        with self.assertRaisesRegex(ResponseException, msg) as e:
            mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(e.exception.response.status_code, 500)
def train_svd(order_by, where, offset, limit):
    svd_procedure = "/v1/procedures/order_svd"
    # svd procedure configuration
    svd_config = {
        'type': 'svd.train',
        'params': {
            "trainingData": {
                "from": {
                    "id": "svd_example"
                },
                "select": "x, y, z",
                "orderBy": order_by,
                "where": where,
                "offset": offset,
                "limit": limit
            },
            "rowOutputDataset": {
                "id": "svd_row",
                'type': "embedding"
            },
            "columnOutputDataset": {
                "id": "svd_column",
                "type": "embedding"
            }
        }
    }

    mldb.put(svd_procedure, svd_config)
    mldb.post(svd_procedure + '/runs')

    result = mldb.get('/v1/query', q="SELECT * FROM svd_row")
    return len(result.json()[0]["columns"])
示例#21
0
    def test_empty_str_json(self):
        mldb.put(
            "/v1/plugins/mldb2114", {
                "type": "python",
                "params": {
                    "source": {
                        "routes": """request.set_return("", 200)"""
                    }
                }
            })

        res = mldb.get('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.post('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.put('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")

        res = mldb.delete('/v1/plugins/mldb2114/routes/foo')
        self.assertEqual(res.status_code, 200)
        self.assertEqual(res.json(), "")
示例#22
0
    def test_long_quoted_lines(self):
        with open("tmp/broken_csv.csv", 'wt', encoding="utf-8") as f:
            f.write("a,b\n")
            f.write("1,\"" + " ".join(["word " for x in range(50)])+"\"\n")
            f.write("1,\"" + " ".join(["word " for x in range(100)])+"\"\n")
            f.write("1,\"" + " ".join(["word " for x in range(1000)])+"\"\n")
            f.write("1,\"" + " ".join(["word " for x in range(10000)])+"\"\n")

        csv_conf = {
            "type": "import.text",
            "params": {
                'dataFileUrl' : 'file://tmp/broken_csv.csv',
                "outputDataset": {
                    "id": "x",
                },
                "runOnCreation": True,
                "ignoreBadLines": False
            }
        }
        mldb.put("/v1/procedures/csv_proc", csv_conf)

        result = mldb.get(
            "/v1/query",
            q="select tokenize(b, {splitChars: ' '}) as cnt "
            "from x order by rowName() ASC")
        js_rez = result.json()
        mldb.log(js_rez)

        answers = {"2": 50, "3": 100, "4": 1000, "5": 10000}
        for row in js_rez:
            self.assertEqual(answers[row["rowName"]], row["columns"][0][1])
示例#23
0
    def test_dataset(self):
        # Example of a read only mongo db dataset. MLDB queries can be made
        # over it.
        mldb.put('/v1/datasets/ds', {
            'type' : 'mongodb.dataset',
            'params' : {
                'uriConnectionScheme' : self.connection_scheme,
                'collection' : 'test_collection',
            }
        })

        res = mldb.query("SELECT * FROM ds")
        self.assertEqual(len(res), 5)

        res = mldb.query("SELECT * FROM ds WHERE unexisting_field='Finch'")
        self.assertEqual(len(res), 1)
        res = mldb.query("SELECT * FROM ds WHERE type='simple'")
        self.assertEqual(len(res), 2)
        self.assertEqual(res[1][2], 'simple')

        res = mldb.query("SELECT type FROM ds ORDER BY type")
        self.assertEqual(res[1][1], None)
        self.assertEqual(res[2][1], 'nested_arr')
        self.assertEqual(res[3][1], 'nested_obj')
        self.assertEqual(res[4][1], 'simple')

        res = mldb.query("SELECT username FROM ds WHERE unexisting != 'Finch'")
        self.assertEqual(len(res), 1)

        res = mldb.query("SELECT username FROM ds WHERE type != 'simple'")
        self.assertEqual(len(res), 3)
示例#24
0
 def test_no_cls_write_perms(self):
     conf = {
         "type": "classifier.experiment",
         "params": {
             "experimentName": "my_test_no_write",
             "inputData":
             "select {* EXCLUDING(label)} as features, label from toy",
             "kfold": 2,
             "modelFileUrlPattern": "file:///bouya-$runid.cls",
             "algorithm": "glz",
             "mode": "boolean",
             "configuration": {
                 "glz": {
                     "type": "glz",
                     "verbosity": 3,
                     "normalize": False,
                     "regularization": 'l2'
                 }
             },
             "outputAccuracyDataset": False,
             "runOnCreation": True
         }
     }
     with self.assertRaisesRegex(ResponseException, 'Error when trying'):
         mldb.put("/v1/procedures/rocket_science", conf)
    def test_it(self):
        url = '/v1/datasets/input'
        mldb.put(url, {
            'type' : 'sparse.mutable'
        })

        mldb.post(url + '/rows', {
            'rowName' : 'row1',
            'columns' : [['score', 5, 6]]
        })
        mldb.post(url + '/rows', {
            'rowName' : 'row2',
            'columns' : [['score', 1, 5]]
        })

        mldb.post(url + '/commit', {})

        mldb.post('/v1/procedures', {
            'type' : 'bucketize',
            'params' : {
                'inputData' : 'SELECT * FROM input ORDER BY score',
                'outputDataset' : {
                    'id' : 'output',
                    'type' : 'sparse.mutable'
                },
                'percentileBuckets': {'b1': [0, 50], 'b2': [50, 100]},
                'runOnCreation' : True
            }
        })

        res = mldb.query('SELECT latest_timestamp({*}) FROM output')
        self.assertEqual(res[1][1], '1970-01-01T00:00:06Z')
示例#26
0
 def test_record_null_row_name(self):
     mldb.put('/v1/datasets/ds_null', {'type': 'sparse.mutable'})
     with self.assertRaises(ResponseException):  # noqa
         mldb.post('/v1/datasets/ds_null/rows', {
             'rowName': None,
             'columns': [['colA', 1, 1]]
         })
示例#27
0
    def run_MLDBFB_545_with_ds_type(self, ds_type):
        id1 = ds_type + 'mldbfb545_1'
        ds = mldb.create_dataset({'id': id1, 'type': ds_type + '.mutable'})
        ds.record_row('user1', [['converted', 'n', 0]])
        ds.commit()

        id2 = ds_type + 'mldbfb545_2'
        ds = mldb.create_dataset({'id': id2, 'type': ds_type + '.mutable'})
        ds.record_row('user2', [['blah', 'blah', 0]])
        ds.commit()

        # query directly on the dataset works
        res = mldb.query("""
            SELECT * FROM {} WHERE converted='c' LIMIT 1
        """.format(id1))
        self.assertEqual(len(res), 1)

        merge_id = ds_type + 'mldbfb545merged'
        mldb.put("/v1/datasets/" + merge_id, {
            "type": "merged",
            "params": {
                "datasets": [{
                    "id": id1
                }, {
                    "id": id2
                }]
            }
        })

        # query on the merged dataset yields incorrect results
        res = mldb.query("""
            SELECT * FROM {} WHERE converted='c' LIMIT 1
        """.format(merge_id))
        mldb.log(res)
        self.assertEqual(len(res), 1)
示例#28
0
    def test_too_many_requested_rows(self):
        # too many requested rows without sampling
        sampled_dataset_conf = {
            "type": "sampled",
            "params": {
                "dataset": {
                    "id": "toy"
                },
                "rows": 25000,
                "withReplacement": False
            }
        }
        with self.assertRaises(ResponseException) as re:
            mldb.put("/v1/datasets/patate", sampled_dataset_conf)
        self.assertEqual(re.exception.response.status_code, 400)

        sampled_dataset_conf["params"]["withReplacement"] = True
        mldb.put("/v1/datasets/patate", sampled_dataset_conf)

        # try to insert and make sure we get an exception
        with self.assertRaises(ResponseException) as re:
            mldb.post("/v1/datasets/patate/rows", {
                "rowName": "patato",
                "columns": [["a", "b", 0]]
            })
        self.assertEqual(re.exception.response.status_code, 400)
示例#29
0
 def test_name_with_space(self):
     _id = "name with space"
     url = '/v1/datasets/' + quote(_id)
     mldb.log(url)
     mldb.put(url, {'type': 'sparse.mutable'})
     res = mldb.get(url).json()
     self.assertEqual(res['id'], _id)
示例#30
0
def train_tsne(when):
    global dataset_index
    dataset_index += 1

    tsne_procedure = "/v1/procedures/when_tsne"
    # t-sne procedure configuration
    tsne_config = {
        'type': 'tsne.train',
        'params': {
            "trainingData": {
                "from": {
                    "id": "svd_example"
                },
                "when": when
            },
            "rowOutputDataset": {
                "id": "tsne_embedding_" + str(dataset_index),
                'type': "embedding"
            }
        }
    }

    mldb.put(tsne_procedure, tsne_config)
    mldb.post(tsne_procedure + '/runs')

    result = mldb.get('/v1/query',
                      q="SELECT * FROM tsne_embedding_" + str(dataset_index))
    return len(result.json()[0]["columns"])