class MyAdaptiveFilter(BasicFilter): def __init__(self, name, acc, trans, noits, meats, u0, v0_prob, wk_prob, vk_prob, total_frame, param): super().__init__(name, acc, trans, noits, meats, u0, v0_prob, wk_prob, vk_prob, total_frame) self.model = NNModel(param) def fit(self, X_train, y_train, is_valid=True, save_model=False, path=None): self.model.fit(X_train, y_train, is_valid=is_valid) if save_model: self.model.save_model(path) def predict(self, measure, TPM, *args, **kwargs): load_model = kwargs['load_model'] path = kwargs['path'] if load_model: self.model.load_model(path) sample = measure.shape[0] LH = np.empty([sample, 3], np.float32) uk = np.empty([sample, 3], np.float32) X = np.empty([sample, 3, 2], np.float32) P = np.empty([sample, 3, 2, 2], np.float32) mode = np.empty([sample, self.frame], np.float32) state = np.empty([sample, self.frame, 2], np.float32) his_TPM = np.empty([sample, self.frame, 3, 3], np.float32) for i in range(sample): mode[i], state[i], X[i], P[i], LH[i], uk[i] = self.initial_state( measure[i, 0]) for k in range(1, self.frame): for i in range(sample): mode[i, k], state[i, k], X[i], P[i], LH[i], uk[i] = \ self.IMMFilter(measure[i, k], TPM[i], X[i], P[i], LH[i], uk[i]) X_test = np.concatenate([ np.zeros([sample, self.frame - 1 - k, 3], dtype=np.float32), np.array([ mode[:, :k + 1] == 0, mode[:, :k + 1] == 1, mode[:, :k + 1] == 2 ], dtype=np.float32).transpose([1, 2, 0]) ], axis=1).reshape([sample, -1]) TPM = self.model.predict(X_test) his_TPM[:, k] = TPM return state, mode, his_TPM
txtdata = data_interp.simplify_text_data(txtdata, min_freq=n_files / 2) # Set the number of words to keep based on the number of words that appear more often min_feq vocab = data_interp.set_num_words(txtdata, min_freq=n_files / 2) vocab_size = len(vocab) + 1 # Convert the data to sequences of integers with some maximum length max_length, sequences = data_interp.training_data_to_padded_sequences( txtdata, max_len=15, shuffle_data=True) # Break up the sequences into input (sequence of n words) and output (single word to test against) input_data, output = sequences[:, :-1], sequences[:, -1] output = to_categorical(output, num_classes=vocab_size) # Save the tokenizer for later use, in case we randomized the training data # If the training data was randomized we will need to know the words and word_index later for testing tokenizer_json = data_interp.tokenizer.to_json() with open("./tokenizer_%s_file_training.json" % n_files, "w", encoding="utf-8") as jsonf: jsonf.write(dumps(tokenizer_json, ensure_ascii=False)) # Prepare the model model = NNModel() # Input layer should have max_length - 1 neurons, output layer should have one neuron per word token # Hidden layer size determined by the 2/3*(input layer + output layer) rule of thumb model.prepare_model(max_length - 1, vocab_size, hidden_layer_size=int( (vocab_size + max_length - 1) * 2 / 3)) # Fit on training data model.fit_model(input_data, output) # Save model, can be loaded later for testing without re-training model.save_model("./model_%s_file_training.h5" % str(n_files))