示例#1
0
    tester = GeneralTester(log_writer, start_time)
    datasets_helper = Dataset_Helper(preprocess=preprocess)
    datasets_helper.set_wanted_datasets([0, 2, 3])
    while datasets_helper.next_dataset():
        if 'topic_count' in models_params[model]:
            models_params[model][
                'topic_count'] = datasets_helper.get_num_of_topics()
        topic_names = [(index, item) for index, item in enumerate(
            datasets_helper.get_dataset_topic_names())]
        tester.set_new_dataset(datasets_helper.get_num_of_topics(),
                               topic_names)
        output_csv = []
        """for key,value in test_model.items():
            if not value:
                models_params.pop(key)"""
        log_writer.write_any("model-settings",
                             json.dumps(models_params[model]), 'w+', True)
        seed = 5
        random.seed(5)

        log_writer.add_log(
            "Starting preprocessing texts of {} for training".format(
                datasets_helper.get_dataset_name()))
        texts_for_train = datasets_helper.get_dataset(DatasetType.TRAIN)
        log_writer.add_log("Preprocessing finished")

        log_writer.add_log(
            "Starting preprocessing texts of {} for testing".format(
                datasets_helper.get_dataset_name()))
        texts_for_testing = datasets_helper.get_dataset(DatasetType.TEST)
        log_writer.add_log("Preprocessing finished")
                                                baseline=None,
                                                restore_best_weights=False)
                              ])
    weight_in = autoencoder.get_weights()[0]
    weight_out = autoencoder.get_weights()[2]
    #tst = autoencoder.get_weights()
    blob = np.array([])

    weight_in = weight_in.transpose()
    #combined_weight = np.dot(weight_in.transpose(), weight_out)
    num_of_important_words = 20

    log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, ""),
                           result_desc="NeuralTopicModel")

    log_writer.write_any('model', autoencoder.to_json(), 'w+', True)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'g', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss {}'.format(
        dataset_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss"))
    plt.clf()
    """topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in]
    topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
示例#3
0
models_to_test = ['lstm', 'dense', 'embedding', 'bidi']
"""datasets_helper.next_dataset()
space = create_base_params('lstm',datasets_helper)
smpl = sample(space)
print(sample(space))"""
for model in models_to_test:
    while datasets_helper.next_dataset():
        space = create_base_params(model, datasets_helper, results_saver)
        best = fmin(optimize_model,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=30,
                    max_queue_len=1,
                    verbose=False)
        results_saver.add_log(
            'Best params for network type {} and dataset {} are: {}\n{}'.
            format(model, datasets_helper.get_dataset_name(), best,
                   space_eval(space, best)))
        results_saver.write_any('best_params', [
            model,
            datasets_helper.get_dataset_name(),
            space_eval(space, best)
        ], 'a')
        #results_saver.write_2D_list([[model,datasets_helper.get_dataset_name(),best]],'best_params','a')
    datasets_helper.reset_dataset_counter()
"""best_run, best_model = optim.minimize(model=test,
                                          data=[],
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials())"""