Пример #1
0
def main(_):
    # Save default params and set scope
    saved_params = FLAGS.__flags  # !!!Not pass the parameters on the Colab when pasting the codes
    if saved_params['ensemble']:  # uni + bi + tri
        model_name = 'ensemble'
    elif saved_params['ngram'] == 1:
        model_name = 'unigram'
    elif saved_params['ngram'] == 2:
        model_name = 'bigram'
    elif saved_params['ngram'] == 3:
        model_name = 'trigram'
    else:
        assert True, 'Not supported ngram %d' % saved_params['ngram']
    model_name += '_embedding' if saved_params['embed'] else '_no_embedding'
    saved_params['model_name'] = '%s' % model_name
    saved_params['checkpoint_dir'] += model_name
    pprint.PrettyPrinter().pprint(saved_params)
    saved_dataset = get_data(
        saved_params
    )  # Input the passing parameters; Return train_set, valid_set, test_set, dictionary == [idx2unigram, unigram2idx, idx2country, country2ethnicity, idx2bigram, idx2trigram]

    validation_writer = open(saved_params['valid_result_path'],
                             'a')  # Write in a new file if not existing
    validation_writer.write(model_name + "\n")
    validation_writer.write(
        "[dim_hidden, dim_rnn_cell, learning_rate, lstm_dropout, lstm_layer, hidden_dropout, dim_embed]\n"
    )
    validation_writer.write("combination\ttop1\ttop5\tepoch\n")

    # Run the model
    for _ in range(saved_params['valid_iteration']):
        # Sample parameter sets
        params, combination = sample_parameters(
            saved_params.copy()
        )  # If not default parameters, then update with initialization; return input dictionary and a combination LIST
        dataset = saved_dataset[:]  # Copy the content into dataset; if not, we would link the two variable that can be a problem

        # Initialize embeddings
        uni_init = get_char2vec(dataset[0][0][:], params['dim_embed_unigram'],
                                dataset[3][0])  # Return initializer
        bi_init = get_char2vec(
            dataset[0][1][:], params['dim_embed_bigram'], dataset[3][4]
        )  # The first [] is the outermost dimension == train_set or dictionary; [3][i] gives the outermost dimension in dictionary
        tri_init = get_char2vec(
            dataset[0][2][:], params['dim_embed_trigram'],
            dataset[3][5])  # Easy to understand with get_data()

        print(model_name, 'Parameter sets: ', end='')
        pprint.PrettyPrinter().pprint(combination)

        rnn_model = RNN(params, [uni_init, bi_init, tri_init])
        top1, top5, ep = experiment(rnn_model, dataset, params)

        validation_writer.write(str(combination) + '\t')
        validation_writer.write(
            str(top1) + '\t' + str(top5) + '\tEp:' + str(ep) + '\n')

    validation_writer.close()
Пример #2
0
def main(_):
    # Save default params and set scope
    saved_params = FLAGS.__flags
    if saved_params['ensemble']:
        model_name = 'ensemble'
    elif saved_params['ngram'] == 1:
        model_name = 'unigram'
    elif saved_params['ngram'] == 2:
        model_name = 'bigram'
    elif saved_params['ngram'] == 3:
        model_name = 'trigram'
    else:
        assert True, 'Not supported ngram %d' % saved_params['ngram']
    model_name += '_embedding' if saved_params['embed'] else '_no_embedding'
    saved_params['model_name'] = '%s' % model_name
    saved_params['checkpoint_dir'] += model_name
    pprint.PrettyPrinter().pprint(saved_params)
    saved_dataset = get_data(saved_params)

    validation_writer = open(saved_params['valid_result_path'], 'a')
    validation_writer.write(model_name + "\n")
    validation_writer.write(
        "[dim_hidden, dim_rnn_cell, learning_rate, lstm_dropout, lstm_layer, hidden_dropout, dim_embed]\n"
    )
    validation_writer.write("combination\ttop1\ttop5\tepoch\n")

    # Run the model
    for _ in range(saved_params['valid_iteration']):
        # Sample parameter sets
        params, combination = sample_parameters(saved_params.copy())
        dataset = saved_dataset[:]

        # Initialize embeddings
        uni_init = get_char2vec(dataset[0][0][:], params['dim_embed_unigram'],
                                dataset[3][0])
        bi_init = get_char2vec(dataset[0][1][:], params['dim_embed_bigram'],
                               dataset[3][4])
        tri_init = get_char2vec(dataset[0][2][:], params['dim_embed_trigram'],
                                dataset[3][5])

        print(model_name, 'Parameter sets: ', end='')
        pprint.PrettyPrinter().pprint(combination)

        rnn_model = RNN(params, [uni_init, bi_init, tri_init])
        top1, top5, ep = experiment(rnn_model, dataset, params)

        validation_writer.write(str(combination) + '\t')
        validation_writer.write(
            str(top1) + '\t' + str(top5) + '\tEp:' + str(ep) + '\n')

    # first project test
    validation_writer.close()
Пример #3
0
#  Run the model
for _ in range(saved_params['valid_iteration']):
    # Sample parameter sets
    params, combination = sample_parameters(saved_params.copy(
    ))  # If not default parameters, then update with initialization
    dataset = saved_dataset[:]

    # Initialize embeddings
    uni_init = get_char2vec(dataset[0][0][:], params['dim_embed_unigram'],
                            dataset[3][0])  # Return initializer
    bi_init = get_char2vec(
        dataset[0][1][:], params['dim_embed_bigram'], dataset[3][4]
    )  # The first [] is the outermost dimension == train_set or dictionary; [3][i] gives the outermost dimension in dictionary
    tri_init = get_char2vec(
        dataset[0][2][:], params['dim_embed_trigram'],
        dataset[3][5])  # Easy to understand with get_data()

    #print(model_name, 'Parameter sets: ', end='')
    pprint.PrettyPrinter().pprint(combination)

    rnn_model = RNN(params, [uni_init, bi_init, tri_init])
    top1, top5, ep = experiment(rnn_model, dataset, params)

    validation_writer.write(str(combination) + '\t')
    validation_writer.write(
        str(top1) + '\t' + str(top5) + '\tEp:' + str(ep) + '\n')
validation_writer.close()

#if __name__ == '__main__':
#tf.app.run()