Пример #1
0
def pretrain_actorCritic():
    if USE_SAVED_PREPROCESSED_INPUT:
        #sentences = pickle.load(open(PRETRAINING_PREPROCESSED_INPUT_FILE, 'r'))[:5000]
        sentences = 
    else:
        print("Loading data")
        sentences = load_data(PRETRAINING_DATA_FILE)
        print("Data loaded")
        sentences = preprocess_text(sentences)[:5000]
    print("shape of sentences", sentences.shape)

    print("Training w2v model")
    w2v_model = train_w2v_model(sentences)
    print("w2v model trained")

    token_sequences, output_sequences, token_to_index_dic = tokenize_and_pad_sentences(sentences)
    index_to_word_dic = get_index_to_word_dic(token_to_index_dic)
    token_sequences = np.asarray(token_sequences)
    output_sequences = np.asarray(output_sequences)
    print("input shape", token_sequences.shape)

    #token_sequences = token_sequences[:1000, :]
    #output_sequences[:1000, :]

    output_sequences = [one_hot(seq, len(token_to_index_dic)) for seq in output_sequences]
    print("Tokenization done. %d sequences" % len(token_sequences), "shape ", token_sequences.shape)
    #token_to_index_dic = get_word_to_index_dic(w2v_model, token_sequences)
    print("preprocessing done")
    train_x, train_y, val_x, val_y, test_x, test_y  = get_train_val_test_data(token_sequences, output_sequences)

    autoencoder = Autoencoder(w2v_model, token_to_index_dic)


    print("Creating NN model")
    autoencoder.create_nn_model()
    print("NN model created")

    if LOAD_WEIGHTS:
        print("Loading saved weights from %s" % PRETRAINING_ACTOR_WEIGHTS_FILE)
        autoencoder.load_weights(PRETRAINING_ACTOR_WEIGHTS_FILE)

    if TRAIN_ACTOR:
        print("Training actor")
        autoencoder.train(train_x, train_y,  val_x, val_y)

        if SAVE_WEIGHTS:
            print("Saving actor weights")
            autoencoder.save(PRETRAINING_ACTOR_WEIGHTS_FILE)

    print("Predicting using actor")
    output = autoencoder.predict(train_x)
    for seq in output:
        print(index_to_sentence(index_to_word_dic, [np.argmax(ele) for ele in seq]))

    print("Initializing actorCritic")
    actor_critic = ActorCriticAutoEncoder(w2v_model=w2v_model,
            token_to_index_dic=token_to_index_dic,
            actor=autoencoder.autoencoder)
    print("Creating critic model")
    actor_critic.create_critic_model()
    print("Critic model created")

    critic_train_x = output
    critic_train_y = [one_hot(seq, len(token_to_index_dic)) for seq in train_x]
    if TRAIN_CRITIC:
        print("Training critic")
        actor_critic.train_critic(critic_train_x, critic_train_y)
        print("Critic trained")

        if SAVE_WEIGHTS:
            print("Saving critic")
            actor_critic.save_critic(PRETRAINING_CRITIC_MODEL_FILE)
            print("Critic saved")
Пример #2
0
def main_auto():
    print("Loading w2v model")
    w2v_model = get_w2v_model("/home/aries/Documents/Learning/DL/autoencoder/data/glove.6B/glove.6B.200d_gensim.txt")
    print("w2v model loaded")

    #print("Loading data")
    #sentences = load_data(PRETRAINING_DATA_FILE)
    #print("Data loaded")

    if USE_SAVED_PREPROCESSED_INPUT:
        sentences = pickle.load(open(PRETRAINING_PREPROCESSED_INPUT_FILE, 'r'))[:5000]
    #else:
    #    sentences = preprocess_text(sentences)[:5000]
    print("shape of sentences", sentences.shape)

    token_sequences, output_sequences, token_to_index_dic = tokenize_and_pad_sentences(sentences)
    index_to_word_dic = get_index_to_word_dic(token_to_index_dic)
    token_sequences = np.asarray(token_sequences)
    output_sequences = np.asarray(output_sequences)
    print("input shape", token_sequences.shape)

    #token_sequences = token_sequences[:1000, :]
    #output_sequences[:1000, :]

    output_sequences = [one_hot(seq, len(token_to_index_dic)) for seq in output_sequences]
    print("Tokenization done. %d sequences" % len(token_sequences), "shape ", token_sequences.shape)
    #token_to_index_dic = get_word_to_index_dic(w2v_model, token_sequences)
    print("preprocessing done")
    train_x, train_y, val_x, val_y, test_x, test_y  = get_train_val_test_data(token_sequences, output_sequences)

    autoencoder = Autoencoder(w2v_model, token_to_index_dic)


    print("Creating NN model")
    autoencoder.create_nn_model()
    print("NN model created")

    if LOAD_WEIGHTS:
        print("Loading saved weights from %s" % PRETRAINING_ACTOR_WEIGHTS_FILE)
        autoencoder.load_weights(PRETRAINING_ACTOR_WEIGHTS_FILE)
    print("Training autoencoder")

    if TRAIN_ACTOR:
        autoencoder.train(train_x, train_y,  val_x, val_y)

    if SAVE_WEIGHTS:
        autoencoder.save(PRETRAINING_ACTOR_WEIGHTS_FILE)

    output = autoencoder.predict(test_x)

    #print(test_x)
    #test_x = [de_one_hot(ele) for ele in test_x]

    print("Input")
    for seq in test_x:
        print(index_to_sentence(index_to_word_dic, seq))
    print("\n")
    print("########Generated##########")
    print("\n")
    for seq in output:
        print(index_to_sentence(index_to_word_dic, [np.argmax(ele) for ele in seq]))