def main(): train_file_dir = os.getcwd() + m_config.train_file valid_file_dir = os.getcwd() + m_config.valid_file test_file_dir = os.getcwd() + m_config.test_file data_train_reader_type = RawStringDatasetReader(train_file_dir) itera_x_train = data_train_reader_type.itera_x itera_y_train = data_train_reader_type.itera_y data_valid_reader_type = RawStringDatasetReader(valid_file_dir) itera_x_valid = data_valid_reader_type.itera_x itera_y_valid = data_valid_reader_type.itera_y data_test_reader_type = RawStringDatasetReader(test_file_dir) itera_x_test = data_test_reader_type.itera_x itera_y_test = data_test_reader_type.itera_y train_model = LanguageModel() k_model,ppl = train_model.build_lm_model(itera_x_train, itera_y_train) valid_loss = k_model.evaluate(x=itera_x_valid, y=itera_y_valid,steps=m_config.valid_steps)[0] test_loss = k_model.evaluate(x=itera_x_test, y=itera_y_test,steps=m_config.test_steps)[0] print("trainning's ppl:", ppl) print("valid's ppl:", math.exp(valid_loss)) print("test's ppl:", math.exp(test_loss))
dataset = Dataset(data_dir, num_words) dataset.set_batch_size(batch_size) dataset.set_seq_len(seq_len) dataset.save('./checkpoints/') params = {} params['vocab_size'] = dataset.vocab_size params['num_classes'] = dataset.vocab_size params['batch_size'] = batch_size params['seq_len'] = seq_len params['hidden_dim'] = hidden_size params['num_layers'] = num_layers params['embed_size'] = embed_size model = LanguageModel(params) model.compile() eval_softmax = 5 for epoch in range(num_epochs): dataset.set_data_dir(data_dir) dataset.set_batch_size(batch_size) progbar = generic_utils.Progbar(dataset.token.document_count) for X_batch, Y_batch in dataset: t0 = time.time() loss = model.train_on_batch(X_batch, Y_batch) perp = np.exp(np.float32(loss)) t1 = time.time() wps = np.round((batch_size * seq_len) / (t1 - t0)) progbar.add(len(X_batch), values=[("loss", loss), ("perplexity", perp), ("words/sec", wps)])
if __name__ == "__main__": uniqueService = UniqueService(APP_DBUS_NAME, APP_OBJECT_NAME) app = QApplication(sys.argv) tray_icon = SystemTrayIcon( QIcon(os.path.join(get_parent_dir(__file__), "image", "trayicon.png")), app) tray_icon.show() (constant.TRAYAREA_TOP, constant.TRAYAREA_BOTTOM) = tray_icon.get_trayarea() plugin = Plugin() source_lang_model = LanguageModel() dest_lang_model = LanguageModel() word_engine_name = setting_config.get_translate_config("word_engine") words_engine_name = setting_config.get_translate_config("words_engine") translate_simple = imp.load_source( "translate_simple", plugin.get_plugin_file(word_engine_name)).Translate() translate_long = imp.load_source( "translate_long", plugin.get_plugin_file(words_engine_name)).Translate() word_translate_model = plugin.get_word_model( setting_config.get_translate_config("src_lang"), setting_config.get_translate_config("dst_lang")) words_translate_model = plugin.get_words_model( setting_config.get_translate_config("src_lang"),
from model import LanguageModel import argparse new = LanguageModel() parser = argparse.ArgumentParser() parser.add_argument("source", type=str, help="file to train the model") args = parser.parse_args() result = new.fit(args.source) print(result)