def _save_checkpoint(self): filepath = env().MODEL_FILENAME + "" + "LARGE.h5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] return callbacks_list
def load_vocabularies(): """ Load vocabularies from file :return: char_to_indx, idx_to_char, maq_seq_len """ with open(env().path_to_vocabularies, "rb") as f_pickle: char_to_idx = pickle.load(f_pickle) idx_to_char = pickle.load(f_pickle) max_seq_len = pickle.load(f_pickle) return char_to_idx, idx_to_char, max_seq_len
def main(): dr = DataReader(env().data_dir) data, labels = dr.create_dataset() dr.save_data_csv() data, labels = dr.load_dataset() dg = DataGenerator(data, labels) data, labels = dg.generate_data() X_train, X_test, y_train, y_test = dg.split_train_test(env().DIM_TEST) dg.save_data(X_train, X_test, y_train, y_test) X_train, X_test, y_train, y_test, word_to_idx, idx_to_word, max_words_length = load_data( ) print(len(X_train)) langclassifier = LanguageClassifier(X_train, y_train, X_test, y_test) langclassifier.train_model()
def load_data(): with open(env().path_to_arrays, "rb") as f_pickle: X_tr = pickle.load(f_pickle) X_te = pickle.load(f_pickle) y_tr = pickle.load(f_pickle) y_te = pickle.load(f_pickle) word_to_idx = pickle.load(f_pickle) idx_to_word = pickle.load(f_pickle) max_words = pickle.load(f_pickle) return X_tr, X_te, y_tr, y_te, word_to_idx, idx_to_word, max_words
def _build_baseline(self): self.model.add( Embedding(self.vocab_size, env().EMBEDDING_DIM, input_length=self.x_train.shape[1])) self.model.add( LSTM(56, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)) self.model.add(LSTM(56, dropout=0.1, recurrent_dropout=0.1)) self.model.add(Dense(self.y_train.shape[1], activation='softmax'))
def load_data(): """ Load the training and test arrays from file :return: X_training, X_test, y_training, y_test """ with open(env().path_to_arrays, "rb") as f_pickle: X_tr = pickle.load(f_pickle) X_te = pickle.load(f_pickle) y_tr = pickle.load(f_pickle) y_te = pickle.load(f_pickle) return X_tr, X_te, y_tr, y_te
def save_vocabularies(self): with open(env().path_to_vocabularies, 'wb') as f_pickle: pickle.dump(self.char_to_index, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.index_to_char, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.max_seq_len, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
def save_data_csv(self): if (len(self.data) > 0): try: with open(env().dataset_csv_path, mode="w", encoding="utf-8") as f_csv: for i in range(len(self.data)): f_csv.write("{}\t{}\n".format(self.data[i], self.labels[i])) f_csv.close() except Exception as ex: print("Error: dataset not save - ", ex) else: print("Empty dataset!\n")
def load_dataset(self): data_list, labels_list = [], [] with open(env().dataset_csv_path, mode="rt", encoding="utf-8") as f_csv: lines = f_csv.read().split('\n') for line in lines[:-1]: x, y = line.split('\t') data_list.append(x) labels_list.append(y) f_csv.close() # restore fields of object datareader self.data = data_list self.label = labels_list return self.data, self.label
def save_data(self, X_tr, X_te, y_tr, y_te): with open(env().path_to_arrays, 'wb') as f_pickle: pickle.dump(X_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(X_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(y_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(y_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.word_to_index, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.index_to_word, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.max_seq_len, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, x_train, y_train, x_test, y_test, vocab_size=None): self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.vocab_size = vocab_size self.batch_size = env().BATCH_SIZE self.embed_dim = env().EMBEDDING_DIM self.epochs = env().EPOCHS self.optimizer = env().OPTIMIZER self.units = env().UNITS self.model_filename = env().MODEL_FILENAME self.model = Sequential()
def save_data(self, X_tr, X_te, y_tr, y_te): with open(env().path_to_arrays, 'wb') as f_pickle: pickle.dump(X_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(X_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(y_tr, f_pickle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(y_te, f_pickle, protocol=pickle.HIGHEST_PROTOCOL)