def test_model(n_gram_mins): fe = FeatureExtractor("../dataset/slack_dialogue.txt", n_grams=[1, 2, 3, 4], n_gram_mins=n_gram_mins, debug=False) fe.load() me = ModelEvaluator(fe.headers, fe.features) model_array, highest_rate = me.search_initial_best_fit_algorithm() chosen_model = model_array[random.randint(0, len(model_array) - 1)] mb = ModelBuilder(chosen_model) X_train, X_validation, Y_train, Y_validation = me.split_dataset() mb.fit_model(X_train, Y_train) accuracy_score = mb.accuracy_score(X_validation, Y_validation) print("Got score: " + str(accuracy_score) + " with model: " + str(model_array)) print("Using : " + str(n_gram_mins)) return accuracy_score, model_array
val_range = json.loads(config['model-parameters']['val_range']) train_data = train_data[int(data_len * val_range) + 1:] val_data = train_data[:int(data_len * val_range)] train_datagen = DataGenerator(train_data_dir, train_data, classes, n_channels=n_channels, dim=data_shape, batch_size=batch_size, rescale=255) val_datagen = DataGenerator(train_data_dir, val_data, classes, n_channels=n_channels, dim=data_shape, batch_size=batch_size, rescale=255) model.compile_model(optimizer=optimizer, loss=loss, metrics=metrics, learning_rate=lr) history = model.fit_model(train_gen=train_datagen, val_gen=val_datagen, steps_per_epoch=steps, val_steps_per_epoch=steps, epochs=epochs, callbacks=callbacks)
class AlphaLayer: # Default constructor including path and debug toggle. Also includes count algorithm default and specification # This constructor does have a way to turn off the load of spacy. This is not meant for production and # should only be done to speed up debug times. def __init__(self, path, debug=False, count_algorithm=StandardPresenceBoolean(), load_spacy=True): self.spacy_loaded = load_spacy self.path = path self.debug = debug self.fe = FeatureExtractor(path, self.debug, count_algorithm=count_algorithm, load_spacy=load_spacy) self.load(True) if self.debug: print("Accuracy score: " + str( self.accuracy_score) + " with classifier " + self.chosen_model + " out of " + str(self.model_array)) def load(self, select_new_best_model=False): ''' Reloads data from the file and selects the best model. Useful when there are automated updates to datasets. ''' self.fe.load() self.me = ModelEvaluator(self.fe.headers, self.fe.features) if select_new_best_model: self.me = ModelEvaluator(self.fe.headers, self.fe.features) self.model_array, self.highest_rate = self.me.search_initial_best_fit_algorithm() self.chosen_model = self.model_array[ random.randint(0, len(self.model_array) - 1)] self.mb = ModelBuilder(self.chosen_model) self.X_train, self.X_validation, self.Y_train, self.Y_validation = self.me.split_dataset() self.mb.fit_model(self.X_train, self.Y_train) self.accuracy_score = self.mb.accuracy_score(self.X_validation, self.Y_validation) # Change dataset path def change_path(self, path): self.path = path self.fe.path = path # Append a line to the dataset. Caution: no formatting checks are done in this method. def add_line(self, line): with open(self.path, "a") as datafile: datafile.write('\n' + line) def handle_buy_item(self, sentence): return "Got classifier: buy\nThank you for purchasing " + self.evaluate(sentence) def handle_open_shop(self, sentence): return "Got classifier: shop\nHere you go, take a look at my wares.\n" def handle_conversation(self, sentence): return "Got classifier: convo\nI don't feel like talking to you" def handle_undo(self, sentence): return "Got classifier: undo\nReally mate? You sure you want to take it back?" def handle_sell(self, sentence): return "Got classifier: sell\nI'll gladly accept your " + self.evaluate(sentence) # Evaluate a string with spacy classifier def evaluate(self, line): doc = self.fe.parser(unicode(line)) file_str = StringIO() current_string = "" compound_number = "" for word in doc: if word.pos_ == u'NOUN' or word.pos_ == u'PROPN': # Probably the thing we want to buy current_string += word.text + " " if word.pos_ == u'NUM': # This is an amount. compound_number += word.text + " " if word.pos_ == u'CONJ': # Consider this termination of the current item. Record amount and such. if current_string != "": # Only terminate if they specified a thing to terminate with current_amount = parse(compound_number.strip()) file_str.write(str(current_amount)) file_str.write(" ") file_str.write(current_string.strip()) file_str.write("; ") current_string = "" if word.pos_ == u'PUNCT': if word.text == u';': current_amount = parse(compound_number.strip()) file_str.write(str(current_amount)) file_str.write(" ") file_str.write(current_string.strip()) file_str.write("; ") current_string = "" if word.text == u',': current_amount = parse(compound_number.strip()) file_str.write(str(current_amount)) file_str.write(" ") file_str.write(current_string.strip()) file_str.write("; ") current_string = "" if current_string != "": current_amount = parse(compound_number.strip()) if compound_number.strip() != "" else 1 file_str.write(str(current_amount)) file_str.write(" ") file_str.write(current_string.strip()) return file_str.getvalue()