window=5, min_count=5) model.save(w2v_fp) wv = model.wv data.filter_words(set(model.wv.vocab.keys())) data.cut_train_and_test(balance=True) ## rnn batch_size = 1024 num_batch_per_epoch = data.get_num_of_batch(batch_size) hidden_size = 100 num_epoch = 100 lr = 0.002 rnn = RNN("gru", hidden_size, embedding_size, lr=lr) rnn.build_graph() sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(num_epoch): for _ in range(num_batch_per_epoch): batch = data.get_train_batch(batch_size) batch_X = [ np.array([wv[w] for w in tweet.words]) for tweet in batch ] batch_y = [int(tweet.label == "bull") for tweet in batch] _, _ = rnn.train(batch_X, batch_y, sess) ## accuracy check, train, valid, test sets train_X = [ np.array([wv[w] for w in tweet.words]) for tweet in data.train
w2i = data.word2index() data.cut_train_and_test(balance=True) ## rnn batch_size = 128 num_batch_per_epoch = data.get_num_of_batch(batch_size) hidden_size = 100 num_epoch = 100 lr = 0.002 embedding_size = 100 ## embedding rnn = RNN("gru", hidden_size, embedding_size, lr=lr) rnn.build_graph(embedding=True, vocab_size=len(w2i), embedding_size=100) sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(num_epoch): for _ in range(num_batch_per_epoch): batch = data.get_train_batch(batch_size) batch_X = [np.array(tweet.word_indexes) for tweet in batch] batch_y = [int(tweet.label=="bull") for tweet in batch] _, _= rnn.train(batch_X, batch_y, sess, embedding=True) ## accuracy check, train, valid, test sets train_X = [np.array(tweet.word_indexes) for tweet in data.train] train_y = [int(tweet.label == "bull") for tweet in data.train] train_ent, train_acc = rnn.cal_accuracy(train_X, train_y, sess, embedding=True)