async def accuracy(self, sources: Sources) -> Accuracy: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before assessing for accuracy") # Split regression line tuple into variables, ignore accuracy from # training data since we'll be re-calculating it for the test data m, b, _accuracy = regression_line # X and Y data x = [] y = [] # Go through all records that have the feature we're testing on and the # feature we want to predict. async for record in sources.with_features( [self.config.feature.name, self.config.predict.name]): x.append(record.feature(self.config.feature.name)) y.append(record.feature(self.config.predict.name)) # Use self.logger to report how many records are being used for testing self.logger.debug("Number of test records: %d", len(x)) # Calculate the regression line for test data and accuracy of line regression_line = [m * x + b for x in x] accuracy = coeff_of_deter(y, regression_line) # Update the accuracy to be the accuracy when assessed on the test data self.storage["regression_line"] = m, b, accuracy return Accuracy(accuracy)
async def accuracy(self, sources: Sources) -> Accuracy: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before assessing for accuracy.") # Accuracy is the last element in regression_line, which is a list of # three values: m, b, and accuracy. return Accuracy(regression_line[2])
async def accuracy(self, sources: Sources) -> Accuracy: if not self.model: raise ModelNotTrained("Train the model before assessing accuracy") test_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): test_data.append(record.features()) df = pd.DataFrame(test_data) y_test = df[[self.parent.config.predict.name]] x_test = df.drop(columns=[self.parent.config.predict.name]) predictions = await self.get_predictions(x_test) accuracy = await self.accuracy_score(y_test, predictions) return Accuracy(accuracy)
async def accuracy(self, sources: SourcesContext) -> Accuracy: if not os.path.isdir(os.path.join(self.parent.config.output_dir, "ner")): raise ModelNotTrained("Train model before assessing for accuracy.") test_examples = await self._preprocess_data(sources) self.nlp = spacy.load(self.parent.config.output_dir) scorer = Scorer() for input_, annot in test_examples: doc_gold_text = self.nlp.make_doc(input_) gold = GoldParse(doc_gold_text, entities=annot["entities"]) pred_value = self.nlp(input_) scorer.score(pred_value, gold) return Accuracy(scorer.scores["tags_acc"])
async def accuracy(self, sources: SourcesContext) -> Accuracy: # Load saved anomalies anomalies = self.storage.get("anomalies", None) # Ensure the model has been trained before we try to make a prediction if anomalies is None: raise ModelNotTrained("Train model before assessing for accuracy.") epsilon, _F1val, mu, sigma2 = anomalies X = [] Y = [] # Go through all records that have the feature we're training on and the # feature we want to predict. async for record in sources.with_features( self.features + [self.parent.config.predict.name]): record_data = [] for feature in record.features(self.features).values(): record_data.extend( [feature] if np.isscalar(feature) else feature) X.append(record_data) Y.append(record.feature(self.parent.config.predict.name)) self.logger.debug("Number of test records: %d", len(X)) # Number of features nof = len(self.features) X = np.reshape(X, (len(X), nof)) Y = np.reshape(Y, (len(Y), 1)) mu = np.array(mu) sigma2 = np.array(sigma2) p = multivariateGaussian(X, mu, sigma2) pred = (p < epsilon).astype(int) F1 = getF1(Y, pred) outliers = p < epsilon listOfOl = findIndices(outliers) accuracy = F1 # Update the accuracy self.storage["anomalies"] = epsilon, F1, mu.tolist(), sigma2.tolist() return Accuracy(accuracy)
async def accuracy(self, sources: Sources) -> Accuracy: if self.lm_trained is None: raise ModelNotTrained("Train model before assessing for accuracy.") feature_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): feature_data.append( record.features(self.features + [self.parent.config.predict.name])) df = self.pd.DataFrame(feature_data) xdata = df.drop([self.parent.config.predict.name], 1) ydata = df[self.parent.config.predict.name] preds = self.ac_predictor.compute(xdata, self.lm_trained) # Calculate accuracy with an error margin of 0.1 accuracy_val = sum( self.compare(list(map(abs, map(sub, ydata, preds.prediction))), 0.1)) / len(ydata) return Accuracy(accuracy_val)
async def accuracy(self, sources: SourcesContext) -> Accuracy: if not os.path.isdir(os.path.join(self.parent.config.directory, "ner")): raise ModelNotTrained("Train model before assessing for accuracy.") test_examples = await self._preprocess_data(sources) self.nlp = spacy.load(self.parent.config.directory) scorer = Scorer() examples = [] for input_, annot in test_examples: pred_value = self.nlp(input_) example = Example.from_dict(pred_value, {"entities": annot["entities"]}) example.reference = self.nlp.make_doc(input_) examples.append(example) scores = scorer.score(examples) return Accuracy(scores["token_acc"])
async def accuracy(self, sources: Sources) -> Accuracy: # Ensure the model has been trained before we try to make a prediction if self.separating_line is None: raise ModelNotTrained("Train model before assessing for accuracy.") accuracy_value = self.separating_line[2] return Accuracy(accuracy_value)
async def accuracy(self, sources: Sources) -> Accuracy: accuracy: int = 0 async for record in sources.records(): accuracy += int(record.key) return Accuracy(accuracy)