def load_model_parameters_lstm(path, softmax_temperature=1, activation='tanh', stack_height=15): npzfile = np.load(path) print "Building model from {0} with hidden_dim: {1}, word_dim: {2} and num_layers: {3}".format(path, npzfile['hidden_dim'], npzfile['word_dim'], npzfile['num_layers']) sys.stdout.flush() t1 = time.time() model = LSTM_Net(word_dim=npzfile['word_dim'], hidden_dim=npzfile['hidden_dim'], minibatch_dim=npzfile['minibatch_dim'], num_layers=npzfile['num_layers'], activation=activation) model.char_to_code_dict, model.code_to_char_dict, _ = getCharDicts() for i in xrange(len(model.params)): param = model.params[i] value = npzfile[str(param)] param.set_value(value) t2 = time.time() print "Building model took {0:.0f} seconds\n".format(t2-t1) sys.stdout.flush() return model
def main(): char_to_code_dict, code_to_char_dict, ALPHABET_LENGTH = getCharDicts() print "char_to_code_dict: {0}\ncode_to_char_dict: {1}\n".format(char_to_code_dict, code_to_char_dict) print "Compiling model..." t1 = time.time() if MODEL_FILE != None: model = load_model_parameters_lstm(MODEL_FILE) else: model = LSTM_Net(word_dim=ALPHABET_LENGTH, hidden_dim=HIDDEN_DIM, minibatch_dim=MINIBATCH_SIZE, bptt_truncate=BPTT_TRUNCATE, num_layers=NUM_LAYERS, optimization=OPTIMIZATION, activation=ACTIVATION, dropout=DROPOUT, l1_rate=L1_REGULARIZATION, l2_rate=L2_REGULARIZATION) model.char_to_code_dict = char_to_code_dict model.code_to_char_dict = code_to_char_dict t2 = time.time() print "Finished! Compiling model took: {0:.0f} seconds\n".format(t2 - t1) # model.build_pretrain() # counter=1 # for x,y in readFile(DATAFILE, char_to_code_dict): # pretrain_o, pretrain_h_init, pretrain_c_init = model.pretrain_model(x, .1, 1) # if counter%1000==0: # break # # print np.argmax(x, axis=1) # # print np.argmax(pretrain_o, axis=1) # counter+=1 # #sys.exit() # #h_init_pretrain, c_init_pretrain = pretrain(DATAFILE, model) losses = [] counter = 0 softmax_temp = 1 while counter<MAX_MINIBATCHES: h_prev = np.zeros((model.num_layers,model.hidden_dim,model.minibatch_dim)).astype('float32') c_prev = np.zeros((model.num_layers,model.hidden_dim,model.minibatch_dim)).astype('float32') # h_prev = pretrain_h_init # c_prev = pretrain_c_init for x,y in readFile(DATAFILE, char_to_code_dict, model.minibatch_dim): if counter%EVAL_LOSS_AFTER==0: if counter==0: print "Dims of one minibatch: {0}".format(x.shape) t1 = time.time() h_prev,c_prev = model.train_model(x, y, h_prev, c_prev, LEARNING_RATE, softmax_temp) t2 = time.time() if counter%(EVAL_LOSS_AFTER*10)==0: print "One SGD step took: {0:.2f} milliseconds".format((t2 - t1) * 1000.) calculateLoss(LOSSFILE, model, counter) else: h_prev,c_prev = model.train_model(x, y, h_prev, c_prev, LEARNING_RATE, softmax_temp) if SAMPLE and (counter!=0) and counter%SAMPLE_EVERY==0: for softmax_temp in SOFTMAX_TEMPS: print "\nSampling sentence with softmax {0}".format(softmax_temp) for _ in xrange(SAMPLE_NUMBER): sent = generate_sentence(model, sample_limit=SAMPLE_LIMIT, softmax_temp=softmax_temp, starting_string=STARTING_STRING) print "".join(sent) print counter+=1 calculateLoss(LOSSFILE, model, counter)