tester = GeneralTester(log_writer, start_time) datasets_helper = Dataset_Helper(preprocess=preprocess) datasets_helper.set_wanted_datasets([0, 2, 3]) while datasets_helper.next_dataset(): if 'topic_count' in models_params[model]: models_params[model][ 'topic_count'] = datasets_helper.get_num_of_topics() topic_names = [(index, item) for index, item in enumerate( datasets_helper.get_dataset_topic_names())] tester.set_new_dataset(datasets_helper.get_num_of_topics(), topic_names) output_csv = [] """for key,value in test_model.items(): if not value: models_params.pop(key)""" log_writer.write_any("model-settings", json.dumps(models_params[model]), 'w+', True) seed = 5 random.seed(5) log_writer.add_log( "Starting preprocessing texts of {} for training".format( datasets_helper.get_dataset_name())) texts_for_train = datasets_helper.get_dataset(DatasetType.TRAIN) log_writer.add_log("Preprocessing finished") log_writer.add_log( "Starting preprocessing texts of {} for testing".format( datasets_helper.get_dataset_name())) texts_for_testing = datasets_helper.get_dataset(DatasetType.TEST) log_writer.add_log("Preprocessing finished")
baseline=None, restore_best_weights=False) ]) weight_in = autoencoder.get_weights()[0] weight_out = autoencoder.get_weights()[2] #tst = autoencoder.get_weights() blob = np.array([]) weight_in = weight_in.transpose() #combined_weight = np.dot(weight_in.transpose(), weight_out) num_of_important_words = 20 log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, ""), result_desc="NeuralTopicModel") log_writer.write_any('model', autoencoder.to_json(), 'w+', True) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'g', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss {}'.format( dataset_helper.get_dataset_name())) plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig( log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss")) plt.clf() """topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in] topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
models_to_test = ['lstm', 'dense', 'embedding', 'bidi'] """datasets_helper.next_dataset() space = create_base_params('lstm',datasets_helper) smpl = sample(space) print(sample(space))""" for model in models_to_test: while datasets_helper.next_dataset(): space = create_base_params(model, datasets_helper, results_saver) best = fmin(optimize_model, space=space, algo=tpe.suggest, max_evals=30, max_queue_len=1, verbose=False) results_saver.add_log( 'Best params for network type {} and dataset {} are: {}\n{}'. format(model, datasets_helper.get_dataset_name(), best, space_eval(space, best))) results_saver.write_any('best_params', [ model, datasets_helper.get_dataset_name(), space_eval(space, best) ], 'a') #results_saver.write_2D_list([[model,datasets_helper.get_dataset_name(),best]],'best_params','a') datasets_helper.reset_dataset_counter() """best_run, best_model = optim.minimize(model=test, data=[], algo=tpe.suggest, max_evals=5, trials=Trials())"""