Пример #1
0
def load_model_parameters_lstm(path, softmax_temperature=1, activation='tanh', stack_height=15):
    npzfile = np.load(path)

    print "Building model from {0} with hidden_dim: {1}, word_dim: {2} and num_layers: {3}".format(path, npzfile['hidden_dim'], npzfile['word_dim'], npzfile['num_layers'])
    sys.stdout.flush()

    t1 = time.time()
    model = LSTM_Net(word_dim=npzfile['word_dim'],
        hidden_dim=npzfile['hidden_dim'],
        minibatch_dim=npzfile['minibatch_dim'],
        num_layers=npzfile['num_layers'], 
        activation=activation)

    model.char_to_code_dict, model.code_to_char_dict, _ = getCharDicts()

    for i in xrange(len(model.params)):
        param = model.params[i]
        value = npzfile[str(param)]
        param.set_value(value)

    t2 = time.time()
    print "Building model took {0:.0f} seconds\n".format(t2-t1)
    sys.stdout.flush()

    return model
Пример #2
0
def main():
    char_to_code_dict, code_to_char_dict, ALPHABET_LENGTH = getCharDicts()
    print "char_to_code_dict: {0}\ncode_to_char_dict: {1}\n".format(char_to_code_dict, code_to_char_dict)

    print "Compiling model..."
    t1 = time.time()
    if MODEL_FILE != None:
        model = load_model_parameters_lstm(MODEL_FILE)
    else:
        model = LSTM_Net(word_dim=ALPHABET_LENGTH, hidden_dim=HIDDEN_DIM, minibatch_dim=MINIBATCH_SIZE, bptt_truncate=BPTT_TRUNCATE,
                           num_layers=NUM_LAYERS, optimization=OPTIMIZATION, activation=ACTIVATION, dropout=DROPOUT,
                           l1_rate=L1_REGULARIZATION, l2_rate=L2_REGULARIZATION)
        model.char_to_code_dict = char_to_code_dict
        model.code_to_char_dict = code_to_char_dict
    t2 = time.time()
    print "Finished! Compiling model took: {0:.0f} seconds\n".format(t2 - t1)

    # model.build_pretrain()
    # counter=1
    # for x,y in readFile(DATAFILE, char_to_code_dict):
    #     pretrain_o, pretrain_h_init, pretrain_c_init = model.pretrain_model(x, .1, 1)

    #     if counter%1000==0:
    #         break
    #         # print np.argmax(x, axis=1)
    #         # print np.argmax(pretrain_o, axis=1)

    #     counter+=1
    # #sys.exit()
    # #h_init_pretrain, c_init_pretrain = pretrain(DATAFILE, model)

    losses = []
    counter = 0
    softmax_temp = 1
    while counter<MAX_MINIBATCHES:
        h_prev = np.zeros((model.num_layers,model.hidden_dim,model.minibatch_dim)).astype('float32')
        c_prev = np.zeros((model.num_layers,model.hidden_dim,model.minibatch_dim)).astype('float32')
        # h_prev = pretrain_h_init
        # c_prev = pretrain_c_init

        for x,y in readFile(DATAFILE, char_to_code_dict, model.minibatch_dim):

            if counter%EVAL_LOSS_AFTER==0:
                if counter==0: print "Dims of one minibatch: {0}".format(x.shape)
                t1 = time.time()

                h_prev,c_prev = model.train_model(x, y, h_prev, c_prev, LEARNING_RATE, softmax_temp)

                t2 = time.time()
                if counter%(EVAL_LOSS_AFTER*10)==0: print "One SGD step took: {0:.2f} milliseconds".format((t2 - t1) * 1000.)


                calculateLoss(LOSSFILE, model, counter)
            else:
                h_prev,c_prev = model.train_model(x, y, h_prev, c_prev, LEARNING_RATE, softmax_temp)


            if SAMPLE and (counter!=0) and counter%SAMPLE_EVERY==0:
                for softmax_temp in SOFTMAX_TEMPS:
                    print "\nSampling sentence with softmax {0}".format(softmax_temp)
                    for _ in xrange(SAMPLE_NUMBER):
                        sent = generate_sentence(model, sample_limit=SAMPLE_LIMIT, softmax_temp=softmax_temp, starting_string=STARTING_STRING)
                        print "".join(sent)
                print

            counter+=1
            

    calculateLoss(LOSSFILE, model, counter)