async def model_predict(self, request, mctx): # TODO Provide an iterkey method for model prediction chunk_size = int(request.match_info["chunk_size"]) if chunk_size != 0: return web.json_response( {"error": "Multiple request iteration not yet supported"}, status=HTTPStatus.BAD_REQUEST, ) # Get the records records: Dict[str, Record] = {} # Create a source with will provide the records async with Sources( MemorySource(records=[ Record(key, data=record_data) for key, record_data in (await request.json()).items() ])) as source: async with source() as sctx: # Feed them through prediction return web.json_response({ "iterkey": None, "records": { record.key: record.export() async for record in mctx.predict(sctx) }, })
async def accuracy(self, sources: Sources) -> Accuracy: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before assessing for accuracy") # Split regression line tuple into variables, ignore accuracy from # training data since we'll be re-calculating it for the test data m, b, _accuracy = regression_line # X and Y data x = [] y = [] # Go through all records that have the feature we're testing on and the # feature we want to predict. async for record in sources.with_features( [self.config.feature.name, self.config.predict.name]): x.append(record.feature(self.config.feature.name)) y.append(record.feature(self.config.predict.name)) # Use self.logger to report how many records are being used for testing self.logger.debug("Number of test records: %d", len(x)) # Calculate the regression line for test data and accuracy of line regression_line = [m * x + b for x in x] accuracy = coeff_of_deter(y, regression_line) # Update the accuracy to be the accuracy when assessed on the test data self.storage["regression_line"] = m, b, accuracy return Accuracy(accuracy)
async def train(self, sources: Sources): async for record in sources.with_features(self.features + [self.config.predict.NAME]): feature_data = record.features(self.features + [self.config.predict.NAME]) self.xData = np.append(self.xData, feature_data[self.features[0]]) self.yData = np.append(self.yData, feature_data[self.config.predict.NAME]) self.separating_line = self.best_separating_line()
class ServerConfig(TLSCMDConfig, MultiCommCMDConfig): port: int = field( "Port to bind to", default=8080, ) addr: str = field( "Address to bind to", default="127.0.0.1", ) upload_dir: str = field( "Directory to store uploaded files in", default=None, ) static: str = field( "Directory to serve static content from", default=None, ) js: bool = field( "Serve JavaScript API file at /api.js", default=False, action="store_true", ) insecure: bool = field( "Start without TLS encryption", action="store_true", default=False, ) cors_domains: List[str] = field( "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)", default_factory=lambda: [], ) allow_caching: bool = field( "Allow caching of HTTP responses", action="store_true", default=False, ) models: Model = field( "Models configured on start", default_factory=lambda: AsyncContextManagerList(), action=list_action(AsyncContextManagerList), labeled=True, ) sources: Sources = field( "Sources configured on start", default_factory=lambda: Sources(), action=list_action(Sources), labeled=True, ) scorers: AccuracyScorer = field( "Scorers configured on start", default_factory=lambda: AsyncContextManagerList(), action=list_action(AsyncContextManagerList), labeled=True, ) redirect: List[str] = field( "list of METHOD SOURCE_PATH DESTINATION_PATH pairs, number of elements must be divisible by 3", action=ParseRedirectsAction, default_factory=lambda: [], ) portfile: pathlib.Path = field( "File to write bound port to when starting. Helpful when port 0 was requeseted to bind to any free port", default=None, )
async def train(self, sources: Sources) -> None: async for record in sources.with_features( self.features + [self.parent.config.predict.name]): feature_data = record.features(self.features + [self.parent.config.predict.name]) df = self.pd.DataFrame(feature_data, index=[0]) xdata = df.drop([self.parent.config.predict.name], 1) ydata = df[self.parent.config.predict.name] self.lm.compute(xdata, ydata) self.lm_trained = self.lm.finalize().model self.joblib.dump(self.lm_trained, self.path)
async def train(self, sources: Sources): all_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): all_data.append(record.features()) df = pd.DataFrame(all_data) y_train = df[[self.parent.config.predict.name]] x_train = df.drop(columns=[self.parent.config.predict.name]) self.model.fit(x_train, y_train) self.model.fit_ensemble(y_train, ensemble_size=self.parent.config.ensemble_size) joblib.dump(self.model, self.path)
async def accuracy(self, sources: Sources) -> Accuracy: if not self.model: raise ModelNotTrained("Train the model before assessing accuracy") test_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): test_data.append(record.features()) df = pd.DataFrame(test_data) y_test = df[[self.parent.config.predict.name]] x_test = df.drop(columns=[self.parent.config.predict.name]) predictions = await self.get_predictions(x_test) accuracy = await self.accuracy_score(y_test, predictions) return Accuracy(accuracy)
async def train(self, sources: Sources) -> None: async for record in sources.with_features( self.features + [self.parent.config.predict.name]): feature_data = record.features(self.features + [self.parent.config.predict.name]) # NOTE Duplicate feature data due to regression in oneDAL # See https://github.com/intel/dffml/issues/801 df = self.pd.DataFrame([feature_data] * 2, index=[0, 1]) xdata = df.drop([self.parent.config.predict.name], 1) ydata = df[self.parent.config.predict.name] self.lm.compute(xdata, ydata) self.lm_trained = self.lm.finalize().model self.joblib.dump(self.lm_trained, self.path)
async def train(self, sources: Sources) -> None: # X and Y data x = [] y = [] # Go through all records that have the feature we're training on and the # feature we want to predict. async for record in sources.with_features( [self.config.feature.name, self.config.predict.name]): x.append(record.feature(self.config.feature.name)) y.append(record.feature(self.config.predict.name)) # Use self.logger to report how many records are being used for training self.logger.debug("Number of training records: %d", len(x)) # Save m, b, and accuracy self.storage["regression_line"] = best_fit_line(x, y)
async def train(self, sources: Sources) -> None: # X and Y data x = [] y = [] # Go through all records that have the feature we're training on and the # feature we want to predict. Since our model only supports 1 feature, # the self.features list will only have one element at index 0. async for record in sources.with_features(self.features + [self.config.predict.NAME]): x.append(record.feature(self.features[0])) y.append(record.feature(self.config.predict.NAME)) # Use self.logger to report how many records are being used for training self.logger.debug("Number of input records: %d", len(x)) # Save m, b, and accuracy self.storage["regression_line"] = best_fit_line(x, y)
async def accuracy(self, sources: Sources) -> Accuracy: if self.lm_trained is None: raise ModelNotTrained("Train model before assessing for accuracy.") feature_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): feature_data.append( record.features(self.features + [self.parent.config.predict.name])) df = self.pd.DataFrame(feature_data) xdata = df.drop([self.parent.config.predict.name], 1) ydata = df[self.parent.config.predict.name] preds = self.ac_predictor.compute(xdata, self.lm_trained) # Calculate accuracy with an error margin of 0.1 accuracy_val = sum( self.compare(list(map(abs, map(sub, ydata, preds.prediction))), 0.1)) / len(ydata) return Accuracy(accuracy_val)
class ServerConfig(TLSCMDConfig, MultiCommCMDConfig): port: int = field( "Port to bind to", default=8080, ) addr: str = field( "Address to bind to", default="127.0.0.1", ) upload_dir: str = field( "Directory to store uploaded files in", default=None, ) static: str = field( "Directory to serve static content from", default=None, ) js: bool = field( "Serve JavaScript API file at /api.js", default=False, action="store_true", ) insecure: bool = field( "Start without TLS encryption", action="store_true", default=False, ) cors_domains: List[str] = field( "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)", default_factory=lambda: [], ) models: Model = field( "Models configured on start", default_factory=lambda: AsyncContextManagerList(), action=list_action(AsyncContextManagerList), labeled=True, ) sources: Sources = field( "Sources configured on start", default_factory=lambda: Sources(), action=list_action(Sources), labeled=True, )
async def train(self, sources: Sources): async for record in sources.records(): self.trained_on[record.key] = record
async def accuracy(self, sources: Sources) -> Accuracy: accuracy: int = 0 async for record in sources.records(): accuracy += int(record.key) return Accuracy(accuracy)
class Server(TLSCMD, MultiCommCMD, Routes): """ HTTP server providing access to DFFML APIs """ # Used for testing RUN_YIELD_START = False RUN_YIELD_FINISH = False INSECURE_NO_TLS = False arg_port = Arg("-port", help="Port to bind to", type=int, default=8080) arg_addr = Arg("-addr", help="Address to bind to", default="127.0.0.1") arg_upload_dir = Arg( "-upload-dir", help="Directory to store uploaded files in", default=None, ) arg_static = Arg("-static", help="Directory to serve static content from", default=None) arg_js = Arg( "-js", help="Serve JavaScript API file at /api.js", default=False, action="store_true", ) arg_insecure = Arg( "-insecure", help="Start without TLS encryption", action="store_true", default=False, ) arg_cors_domains = Arg( "-cors-domains", help= "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)", nargs="+", default=[], ) arg_models = Arg( "-models", help="Models configured on start", nargs="+", default=AsyncContextManagerList(), type=Model.load_labeled, action=list_action(AsyncContextManagerList), ) arg_sources = Arg( "-sources", help="Sources configured on start", nargs="+", default=Sources(), type=BaseSource.load_labeled, action=list_action(Sources), ) async def start(self): if self.insecure: self.site = web.TCPSite(self.runner, host=self.addr, port=self.port) else: ssl_context = ssl.create_default_context( purpose=ssl.Purpose.SERVER_AUTH, cafile=self.cert) ssl_context.load_cert_chain(self.cert, self.key) self.site = web.TCPSite( self.runner, host=self.addr, port=self.port, ssl_context=ssl_context, ) await self.site.start() self.port = self.site._server.sockets[0].getsockname()[1] self.logger.info(f"Serving on {self.addr}:{self.port}") async def run(self): """ Binds to port and starts HTTP server """ # Create dictionaries to hold configured sources and models await self.setup() await self.start() # Load if self.mc_config is not None: # Restore atomic after config is set, allow setting for now atomic = self.mc_atomic self.mc_atomic = False await self.register_directory(self.mc_config) self.mc_atomic = atomic try: # If we are testing then RUN_YIELD will be an asyncio.Event if self.RUN_YIELD_START is not False: await self.RUN_YIELD_START.put(self) await self.RUN_YIELD_FINISH.wait() else: # pragma: no cov # Wait for ctrl-c while True: await asyncio.sleep(60) finally: await self.app.cleanup() await self.site.stop()