Пример #1
0
    async def test_sources(self):
        with tempfile.TemporaryDirectory() as tempdir:
            # Source the HTTP API will pre-load
            source = JSONSource(
                filename=str(pathlib.Path(tempdir, "source.json")),
                allowempty=True,
                readwrite=True,
            )

            # Record the source will have in it
            myrecord = Record("myrecord", data={"features": {"f1": 0}})
            await save(source, myrecord)

            async with ServerRunner.patch(HTTPService.server) as tserver:
                cli = await tserver.start(
                    HTTPService.server.cli(
                        "-insecure",
                        "-port",
                        "0",
                        "-sources",
                        "mysource=json",
                        "-source-mysource-filename",
                        source.config.filename,
                    )
                )
                async with self.get(
                    cli, "/source/mysource/record/myrecord"
                ) as r:
                    self.assertEqual(await r.json(), myrecord.export())
Пример #2
0
 async def update(self, record: Record):
     db = self.parent.db
     # Store feature data
     feature_cols = self.parent.FEATURE_COLS
     feature_data = OrderedDict.fromkeys(feature_cols)
     feature_data.update(record.features(feature_cols))
     await db.execute(
         "INSERT OR REPLACE INTO features (key, " +
         ", ".join(feature_cols) + ") "
         "VALUES(?, " + ", ".join("?" * len(feature_cols)) + ")",
         [record.key] + list(feature_data.values()),
     )
     # Store prediction
     try:
         prediction = record.prediction("target_name")
         prediction_cols = self.parent.PREDICTION_COLS
         prediction_data = OrderedDict.fromkeys(prediction_cols)
         prediction_data.update(prediction.dict())
         await db.execute(
             "INSERT OR REPLACE INTO prediction (key, " +
             ", ".join(prediction_cols) + ") "
             "VALUES(?, " + ", ".join("?" * len(prediction_cols)) + ")",
             [record.key] + list(prediction_data.values()),
         )
     except KeyError:
         pass
Пример #3
0
 async def _add_memory_source(self):
     async with MemorySource(records=[
             Record(str(i), data={"features": {
                 "by_ten": i * 10
             }}) for i in range(0, self.num_records)
     ]) as source:
         self.source = self.cli.app["sources"][self.slabel] = source
         async with source() as sctx:
             self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx
             yield
Пример #4
0
 async def __aenter__(self):
     """
     Populate the source when it's context is entered
     """
     for yaml_path in self.config.directory.rglob("*.yml"):
         for doc in yaml.safe_load_all(yaml_path.read_text()):
             key = (
                 f'https://github.com/{yaml_path.parent.name}/{doc["name"]}'
             )
             self.mem[key] = Record(key, data={"features": doc})
     self.logger.debug("%r loaded %d records", self, len(self.mem))
     return self
Пример #5
0
 async def update(self, record: Record):
     db = self.conn
     # Just dump it (if you want a setup the queries easily, then you need to
     # massage the columns in this table to your liking, and perhaps add more
     # tables.
     marshall = json.dumps(record.dict())
     await db.execute(
         "INSERT INTO ml_data (key, json) VALUES(%s, %s) "
         "ON DUPLICATE KEY UPDATE json = %s",
         (record.key, marshall, marshall),
     )
     self.logger.debug("updated: %s", marshall)
     self.logger.debug("update: %s", await self.record(record.key))
Пример #6
0
 async def record(self, key: str):
     record = Record(key)
     db = self.conn
     # Get features
     await db.execute("SELECT json FROM ml_data WHERE key=%s", (key, ))
     dump = await db.fetchone()
     if dump is not None and dump[0] is not None:
         record.merge(Record(key, data=json.loads(dump[0])))
     await db.execute("SELECT maintained FROM `status` WHERE key=%s",
                      (key, ))
     maintained = await db.fetchone()
     if maintained is not None and maintained[0] is not None:
         record.evaluated({"maintained": str(maintained[0])})
     return record
Пример #7
0
 async def record(self, key: str):
     db = self.parent.db
     record = Record(key)
     # Get features
     features = await db.execute(
         "SELECT " + ", ".join(self.parent.FEATURE_COLS) + " "
         "FROM features WHERE key=?",
         (record.key, ),
     )
     features = await features.fetchone()
     if features is not None:
         record.evaluated(features)
     # Get prediction
     prediction = await db.execute(
         "SELECT * FROM prediction WHERE "
         "key=?", (record.key, ))
     prediction = await prediction.fetchone()
     if prediction is not None:
         record.predicted("target_name", prediction["value"],
                          prediction["confidence"])
     return record
Пример #8
0
    async def test_update(self):

        mydict = [{"A": 1, "B": 2, "C": 3}]
        df = pd.DataFrame(mydict)

        source = DataFrameSource(
            DataFrameSourceConfig(dataframe=df, predictions=["C", "B"]))
        # Save some data in the source
        await save(
            source,
            Record("1", data={"features": {
                "A": 4,
                "B": 5,
                "C": 6
            }}),
            Record("2", data={"features": {
                "A": 7,
                "B": 8,
                "C": 9
            }}),
        )

        await save(source,
                   Record("2", data={"features": {
                       "A": 15,
                       "B": 16,
                       "C": 14
                   }}))

        records = [record async for record in load(source)]
        self.assertEqual(len(records), 3)
        self.assertDictEqual(records[0].features(), {"A": 1})
        self.assertDictEqual(
            records[0].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 2
                },
                "C": {
                    "confidence": 0.0,
                    "value": 3
                },
            },
        )
        self.assertDictEqual(records[1].features(), {"A": 4})
        self.assertDictEqual(
            records[1].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 5
                },
                "C": {
                    "confidence": 0.0,
                    "value": 6
                },
            },
        )
        self.assertDictEqual(records[2].features(), {
            "A": 15,
        })
        self.assertDictEqual(
            records[2].predictions(),
            {
                "B": {
                    "confidence": 0.0,
                    "value": 16
                },
                "C": {
                    "confidence": 0.0,
                    "value": 14
                },
            },
        )
Пример #9
0
    async def test_dataframe(self):

        mydict = [{"A": 1, "B": 2, "C": 3}]
        df = pd.DataFrame(mydict)

        source = DataFrameSource(
            DataFrameSourceConfig(
                dataframe=df,
                predictions=["C"],
            ))
        # Save some data in the source
        await save(
            source,
            Record(
                "1",
                data={
                    "features": {
                        "A": 4,
                        "B": 5
                    },
                    "prediction": {
                        "C": {
                            "value": 6
                        }
                    },
                },
            ),
            Record(
                "2",
                data={
                    "features": {
                        "A": 7,
                        "B": 8
                    },
                    "prediction": {
                        "C": {
                            "value": 9
                        }
                    },
                },
            ),
        )

        # Load all the records
        records = [record async for record in load(source)]

        self.assertIsInstance(records, list)
        self.assertEqual(len(records), 3)
        self.assertDictEqual(records[0].features(), {"A": 1, "B": 2})
        self.assertDictEqual(records[0].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 3
                             }})
        self.assertDictEqual(records[1].features(), {"A": 4, "B": 5})
        self.assertDictEqual(records[1].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 6
                             }})
        self.assertDictEqual(records[2].features(), {"A": 7, "B": 8})
        self.assertDictEqual(records[2].predictions(),
                             {"C": {
                                 "confidence": 0.0,
                                 "value": 9
                             }})
Пример #10
0
    async def test_scorer(self):
        with tempfile.TemporaryDirectory() as tempdir:
            model = SLRModel(
                features=Features(Feature("f1", float, 1)),
                predict=Feature("ans", int, 1),
                location=tempdir,
            )
            # y = m * x + b for equation SLR is solving for
            m = 5
            b = 3

            # Train the model
            await train(model, *[{
                "f1": x,
                "ans": m * x + b
            } for x in range(0, 10)])

            source = JSONSource(
                filename=pathlib.Path(tempdir, "source.json"),
                allowempty=True,
                readwrite=True,
            )

            # Record the source will have in it
            await save(
                source,
                *[
                    Record(
                        str(i),
                        data={"features": {
                            "f1": x,
                            "ans": (m * x) + b
                        }},
                    ) for i, x in enumerate(range(10, 20))
                ],
            )

            async with ServerRunner.patch(HTTPService.server) as tserver:
                cli = await tserver.start(
                    HTTPService.server.cli(
                        "-insecure",
                        "-port",
                        "0",
                        "-models",
                        "mymodel=slr",
                        "-model-mymodel-location",
                        tempdir,
                        "-model-mymodel-features",
                        "f1:float:1",
                        "-model-mymodel-predict",
                        "ans:int:1",
                        "-features",
                        "ans:int:1",
                        "-sources",
                        "mysource=json",
                        "-source-mysource-filename",
                        str(source.config.filename),
                        "-scorers",
                        "myscorer=mse",
                    ))
                async with self.post(cli,
                                     "/scorer/myscorer/mymodel/score",
                                     json=["mysource"]) as r:
                    self.assertEqual(await r.json(), {"accuracy": 0.0})