def runMoreRnn(path=None, epochs=10, saveResult=True): trainData, validData, testData, wordId = loadWordIdsFromFiles() trainData = np.array(trainData, np.float32) # validData = np.array(validData, np.float32) testData = np.array(testData, np.float32) vocabSz = len(wordId) info = loadInfo('rnn', path) learnRate = info['learning rate'] batchSz = info['batch size'] embedSz = info['embed size'] rnnSz = info['rnn size'] winSz = info['win size'] numWin = (trainData.shape[0] - 1) // (batchSz * winSz) # each batch has winSz * numWin words batchLen = winSz * numWin testNumWin = (testData.shape[0] - 1) // (batchSz * winSz) testBatchLen = winSz * testNumWin inp = tf.placeholder(tf.int32, shape=[batchSz, winSz]) # ans = tf.placeholder(tf.int32, shape=[batchSz * winSz]) ans = tf.placeholder(tf.int32, shape=[batchSz, winSz]) E = tf.Variable(tf.random_normal([vocabSz, embedSz], stddev=0.1)) embed = tf.nn.embedding_lookup(E, inp) rnn = BasicRNNCell(rnnSz, activation='relu') initialState = rnn.zero_state(batchSz, tf.float32) output, nextState = tf.nn.dynamic_rnn(rnn, embed, initial_state=initialState) # output = tf.reshape(output, [batchSz * winSz, rnnSz]) W = tf.Variable(tf.random_normal([rnnSz, vocabSz], stddev=.1)) B = tf.Variable(tf.random_normal([vocabSz], stddev=.1)) # logits = tf.matmul(output, W) + B logits = tf.tensordot(output, W, [[2], [0]]) + B ents = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ans) loss = tf.reduce_sum(ents) train = tf.train.GradientDescentOptimizer(learnRate).minimize(loss) trainPerp = np.zeros(epochs + 1, dtype=np.float32) trainPerp[0] = info['train perplexity'] testPerp = np.zeros(epochs + 1, dtype=np.float32) testPerp[0] = info['test perplexity'] with tf.Session() as sess: loadSession(sess, 'rnn', path) startTime = time.time() epoch = 0 print('epoch:', end=' ') while epoch < epochs: epoch += 1 win = 0 state = sess.run(initialState) testState = sess.run(initialState) # print(state, testState) winStart, winEnd = 0, winSz while win < numWin: inInp = np.array([trainData[i * batchLen + winStart:i * batchLen + winEnd] for i in range(batchSz)]) # inAns = np.reshape(np.array([trainData[i * batchLen + winStart + 1: i * batchLen + winEnd + 1] for i in range(batchSz)]), batchSz * winSz) inAns = np.array([trainData[i * batchLen + winStart + 1: i * batchLen + winEnd + 1] for i in range(batchSz)]) _, state, outLoss = sess.run([train, nextState, loss], {inp: inInp, ans: inAns, nextState: state}) trainPerp[epoch] += outLoss if win < testNumWin: inInp = np.array([testData[i * testBatchLen + winStart:i * testBatchLen + winEnd] for i in range(batchSz)]) # inAns = np.reshape(np.array([testData[i * testBatchLen + winStart + 1: i * testBatchLen + winEnd + 1] for i in range(batchSz)]), batchSz * winSz) inAns = np.array([testData[i * testBatchLen + winStart + 1: i * testBatchLen + winEnd + 1] for i in range(batchSz)]) testState, testOutLoss = sess.run([nextState, loss], {inp: inInp, ans: inAns, nextState: testState}) testPerp[epoch] += testOutLoss winStart, winEnd = winEnd, winEnd + winSz win += 1 print(epoch + info['epochs'], end=' ') trainPerp[1:] = np.exp(trainPerp[1:] / (trainData.shape[0] // (batchSz * batchLen) * (batchSz * batchLen))) testPerp[1:] = np.exp(testPerp[1:] / (testData.shape[0] // (batchSz * testBatchLen) * (batchSz * testBatchLen))) print(f'\nelapsed: {time.time() - startTime}') print('train perplexity:', trainPerp[-1]) print('test perplexity:', testPerp[-1]) info['epochs'] += epochs info['train perplexity'] = trainPerp[-1] info['test perplexity'] = testPerp[-1] if saveResult: save(sess, info) drawPerplexity(trainPerp, testPerp, info['epochs'] - epochs)
def runMoreBigram(path=None, epochs=10, saveResult=True): trainData, validData, testData, wordId = loadWordIdsFromFiles() trainData = np.array(trainData, np.float32) # validData = np.array(validData, np.float32) testData = np.array(testData, np.float32) vocabSz = len(wordId) info = loadInfo('bigram', path) batchSz = info['batch size'] embedSz = info['embed size'] learnRate = info['learning rate'] # inp = tf.placeholder(tf.int32, shape=[info['batch size']]) # ans = tf.placeholder(tf.int32, shape=[info['embed size']]) inp = tf.placeholder(tf.int32) ans = tf.placeholder(tf.int32) E = tf.Variable(tf.random_normal([vocabSz, embedSz], stddev=0.1)) embed = tf.nn.embedding_lookup(E, inp) dropRate = tf.placeholder(tf.float32) embed = tf.nn.dropout(embed, rate=dropRate) W = tf.Variable(tf.random_normal([embedSz, vocabSz], stddev=.1)) B = tf.Variable(tf.random_normal([vocabSz], stddev=.1)) logits = tf.matmul(embed, W) + B # dropRate = tf.placeholder(tf.float32) # logits = tf.nn.dropout(logits, rate=dropRate) ents = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ans) loss = tf.reduce_sum(ents) train = tf.train.GradientDescentOptimizer(learnRate).minimize(loss) sess = tf.Session() loadSession(sess, 'bigram', path) startTime = time.time() trainPerp = np.zeros(epochs + 1, dtype=np.float32) trainPerp[0] = info['train perplexity'] testPerp = np.zeros(epochs + 1, dtype=np.float32) testPerp[0] = info['test perplexity'] epoch = 0 print('epoch:', end=' ') while epoch < epochs: epoch += 1 for i in range(trainData.shape[0] // batchSz): inInp = trainData[i * batchSz:(i + 1) * batchSz] inAns = trainData[i * batchSz + 1:(i + 1) * batchSz + 1] # sess.run(train, feed_dict={inp: inp0, ans: ans0}) outLoss, _ = sess.run([loss, train], feed_dict={ inp: inInp, ans: inAns, dropRate: 0.5 }) trainPerp[epoch] += outLoss testPerp[epoch] = sess.run(loss, feed_dict={ inp: testData[:-1], ans: testData[1:], dropRate: 0.0 }) print(epoch + info['epochs'], end=' ') trainPerp[1:] = np.exp(trainPerp[1:] / (trainData.shape[0] // batchSz * batchSz)) testPerp[1:] = np.exp(testPerp[1:] / (testData.shape[0] - 1)) print(f'\nelapsed: {time.time() - startTime}') print('train perplexity:', trainPerp[-1]) print('test perplexity:', testPerp[-1]) info['epochs'] += epochs info['train perplexity'] = trainPerp[-1] info['test perplexity'] = testPerp[-1] if saveResult: save(sess, info) drawPerplexity(trainPerp, testPerp, info['epochs'] - epochs)