def main(): train_data = MNISTData(dset='train') val_data = MNISTData(dset='val') train_loader = DataLoader(train_data, batch_size=8) val_loader = DataLoader(val_data, batch_size=8) loss_func = F.cross_entropy # loss_func Net = MNISTNet() optimizer = optim.Adam(Net.parameters(), lr=1e-3) Net.train() for epoch in range(1): for x, y in tqdm(train_loader): pred = Net(x) loss = loss_func(pred, y) loss.backward() optimizer.step() optimizer.zero_grad() print(loss.item()) Net.eval() print("TRAIN ACCURACY") print(accuracy(Net, train_loader)) print("VAL ACCURACY") print(accuracy(Net, val_loader)) torch.save(Net.state_dict(), './model.pt')
def main(): train_data = MNISTData(dset='train') val_data = MNISTData(dset='val') train_loader = DataLoader(train_data, batch_size=8) val_loader = DataLoader(val_data, batch_size=8) Net = MNISTNet() Net.load_state_dict(torch.load('./model.pt')) Net.eval() print("TRAIN ACCURACY") print(accuracy(Net, train_loader)) print("VAL ACCURACY") print(accuracy(Net, val_loader))
def BSGD(X, y): start_time = time.time() Y = util.transformY(y) W = np.ones([5, X.shape[1]]) * 1.0 / X.shape[1] nabla_list = [] lambdada = 0.05 step = 0.001 iter = 0 batch_size = 500 chunk_list = chunks(range(X.shape[0]), batch_size) round = int(math.ceil(X.shape[0]/(batch_size+0.0))) while iter < 30000: # iteratively update r = chunk_list[iter%round] sumover = np.zeros(X[r].shape[0]).reshape([1, X[r].shape[0]]) for j in xrange(5): sumover += np.exp(W[j]*(X[r].transpose())) softmax = np.exp(W * (X[r].transpose()))/sumover temp = Y[r].T - softmax nabla = temp * X[r] - lambdada * W # adaptive learning rate #step = 10.0/(1000+iter)# adaptive learning rate W = W + step * nabla # if np.sum(nabla * step) < 0.001: # break # train prediction if iter%round == 0: #nabla_list.append(step*np.linalg.norm(nabla)) Sumover = 0 for j in xrange(5): Sumover += np.exp(W[j]*(X.transpose())) distri = np.exp(W * (X.transpose()))/Sumover # hard prediction t = np.argmax(distri, axis=0) t = t + 1 print eval.accuracy(t, y) iter += 1 print 'time: %ss' % (time.time()-start_time) #plt.plot(nabla_list) #plt.show() return W
STEPS = 100 dataset_size = 1111 save_dir = './' # 保存网络路径 global_step = tf.Variable(0, trainable=False) log_dir = './log/' X = tf.placeholder(tf.float32, shape=(None, SIZE, SIZE, 3), name="input_x") Y = tf.placeholder(tf.float32, shape=(None, SIZE, SIZE, 1), name="input_y") y_ = PSP_model.y # 最终输出结果,列表 learing_rate = tf.train.exponential_decay(0.1, global_step, STEPS // 50, 0.9, staircase=True) tf.summary.scalar('learning_rate', learing_rate) loss = eval.accuracy(Y, y_) # 损失应该加入L2正则化 tf.summary.scalar('loss', loss) train_step = tf.train.AdamOptimizer(learing_rate).minimize( loss, global_step=global_step) # 可以用滑动平均模型改进 saver = tf.train.Saver() merged = tf.summary.merge_all() with tf.Session as sess: writer = tf.summary.FileWriter(log_dir, sess.graph) init_op = tf.initialize_all_variables() sess.run(init_op) for i in range(STEPS): start = (i * batch_size) % dataset_size end = min(start + batch_size, dataset_size) _, _ = sess.run([train_step, global_step], feed_dict={ x: X[start:end],
handle_feat.remove() handle_grad.remove() atten = grad_cam(grad_block,fmap_block) att_loss = criterion(out, targets) out, x1, x2, loss_1, loss_2= model(img,cam=False,att=atten) loss = criterion(out, targets) #loss = (1-(1/(epoch+1))) * loss + (1/(epoch+1)) * att_loss + (1-(1/(epoch+1))) * lambda_ * loss_1.sum() loss = 0.4 * loss + 0.6 * att_loss + lambda_ * (loss_1.sum() + loss_2.sum()) else: out = model(img) loss = criterion(out, targets) if args.model_type != 'norm' and args.model_type != 'gradcam'and args.model_type != 'atten': running_loss_1 += loss_1.sum().item() * targets.size(0) #running_loss_2 += loss_2.sum().item() * targets.size(0) running_loss += loss.item() * targets.size(0) prec1, prec5 = accuracy(out.data, targets.data, topk=(1, 5)) #_, pred = torch.max(out, 1) # 预测最大值所在的位置标签 #num_correct = (pred == targets).sum() #accuracy = (pred == targets).float().mean() #running_acc += num_correct.item() running_acc_1 += prec1.item() running_acc_5 += prec5.item() # 向后传播 optimizer.zero_grad() if args.model_type != 'norm' and args.model_type != 'gradcam'and args.model_type != 'atten': #compute and add gradient x1.retain_grad() #x2.retain_grad() loss_1.backward(Variable(torch.ones(*loss_1.size()).cuda(0)), retain_graph=True)
def train_joint_conv_net(w2vFile, dataFile, labelStructureFile, cfswitch, filter_hs, n_epochs=1000, batch_size=50, feature_maps=100, hasmlphidden=False, usefscore=False): """ function: learning and testing sentence level Question Classification Task in a joint fashion, ie. adding the loss function of coarse label prediction and fine label prediction together. :param w2vFile: the path of the word embedding file(pickle file with numpy array value, produced by word2vec.py module) :param dataFile: the dataset file produced by process_data.py module :param labelStructureFile: a file that describes label structure of coarse and fine grains. It is produced in produce_data.py in outputlabelstructure() "param filter_h: sliding window size. *** warning *** you cannot just change window size here, if you want to use a different window for the experiment. YOU NEED TO RE-PRODUCE A NEW DATASET IN process_data.py WITH THE CORRESPONDING WINDOW SIZE. :param n_epochs: the number of epochs the training needs to run :param batch_size: the size of the mini-batch :param feature_maps: how many dimensions you want the abstract sentence representation to be :param mlphiddensize: the size of the hidden layer in MLP :param logFile: the output file of the brief info of each epoch results, basically a save for the print out :param logTest: keep track of results on test set :return: a tuple of best fine grained prediction accuracy and its corresponding coarse grained prediction accuracy """ """ Loading and preparing data """ datasets = load(dataFile) clbl_vec, flbl_vec = process_qc.label_structure(labelStructureFile) trainDataSetIndex = 0 testDataSetIndex = 1 validDataSetIndex = 2 sentenceIndex = 0 clblIndex = 1 # coarse label(clbl) index in the dataset structure flblIndex = 2 # fine label(flbl) index if cfswitch == 'c': lblIndex = clblIndex label_vec = clbl_vec elif cfswitch == 'f': lblIndex = flblIndex label_vec = flbl_vec else: print 'wrong arg value in: cfswtich!' sys.exit() label_size = len(label_vec) if hasmlphidden: layer_size = [feature_maps * len(filter_hs), 100, label_size] else: layer_size = [feature_maps * len(filter_hs), label_size] # train part train_y = shared_store(datasets[trainDataSetIndex][lblIndex]) train_x = shared_store(datasets[trainDataSetIndex][sentenceIndex]) # test part gold_test_y = datasets[testDataSetIndex][lblIndex] test_x = shared_store(datasets[testDataSetIndex][sentenceIndex]) # valid part gold_valid_y = datasets[validDataSetIndex][lblIndex] valid_x = shared_store(datasets[validDataSetIndex][sentenceIndex]) w2v = load(w2vFile) img_w = w2v.shape[1] # the dimension of the word embedding img_h = len(datasets[trainDataSetIndex][sentenceIndex] [0]) # length of each sentence filter_w = img_w # word embedding dimension image_shapes = [] filter_shapes = [] for i in xrange(len(filter_hs)): image_shapes.append((batch_size, 1, img_h, img_w * filter_hs[i])) filter_shapes.append((feature_maps, 1, 1, filter_w * filter_hs[i])) pool_size = (img_h, 1) train_size = len(datasets[trainDataSetIndex][sentenceIndex]) print 'number of sentences in training set: ' + str(train_size) print 'max sentence length: ' + str( len(datasets[trainDataSetIndex][sentenceIndex][0])) print 'train data shape: ' + str( datasets[trainDataSetIndex][sentenceIndex].shape) print 'word embedding dim: ' + str(w2v.shape[1]) """ Building model in theano language, less comments here. You can refer to Theano web site for more details """ batch_index = T.lvector('hello_batch_index') x = T.itensor3('hello_x') y = T.ivector('hello_y') w2v_shared = theano.shared(value=w2v, name='w2v', borrow=True) rng = np.random.RandomState(3435) conv_layer_outputs = [] conv_layers = [] for i in xrange(len(filter_hs)): input = w2v_shared[x.flatten()].reshape( (x.shape[0], 1, x.shape[1], x.shape[2] * img_w))[:, :, :, 0:filter_hs[i] * img_w] conv_layer = LeNetConvPoolLayer(rng, input=input, filter_shape=filter_shapes[i], poolsize=pool_size, image_shape=image_shapes[i], non_linear="relu") conv_layers.append(conv_layer) conv_layer_outputs.append(conv_layer.output.flatten(2)) mlp_input = T.concatenate(conv_layer_outputs, 1) classifier = MLPDropout( rng=rng, input=mlp_input, layer_sizes=layer_size, # [feature_maps * len(filter_hs), label_size], dropout_rate=0.5, activation=Iden) params = [] for conv_layer in conv_layers: params += conv_layer.params params += classifier.params cost = classifier.negative_log_likelihood(y) updates = sgd_updates_adadelta(params, cost) n_batches = train_x.shape.eval()[0] / batch_size train_model = theano.function( inputs=[batch_index], outputs=cost, updates=updates, givens={ x: train_x[batch_index], y: train_y[batch_index], }, ) """ Building test model """ test_conv_layer_outputs = [] for i, conv_layer in enumerate(conv_layers): test_input = w2v_shared[x.flatten()].reshape( (x.shape[0], 1, x.shape[1], x.shape[2] * img_w))[:, :, :, 0:filter_hs[i] * img_w] test_conv_layer_outputs.append( conv_layer.conv_layer_output(test_input, (test_x.shape.eval()[0], 1, img_h, img_w * filter_hs[i])).flatten(2)) test_prediction = classifier.predict( T.concatenate(test_conv_layer_outputs, 1)) # test on test set test_model = theano.function(inputs=[], outputs=test_prediction, givens={ x: test_x, }) # test on valid set valid_model = theano.function(inputs=[], outputs=test_prediction, givens={ x: valid_x, }) """ Training part """ print 'training....' best_valid_ep = 0 best_valid_acc = 0. best_test_ep = 0 best_test_acc = 0. final_acc = 0. epoch = 0 last_acc = 0. # create gold value sequences, required by the eval.py with open('../exp/goldrs', 'w') as writer: for lbl in gold_test_y: writer.write(str(lbl) + '\n') # training loop while (epoch < n_epochs): epoch += 1 print '************* epoch ' + str(epoch) batch_indexes = range(train_size) rng.shuffle(batch_indexes) for bchidx in xrange(n_batches): random_indexes = batch_indexes[bchidx * batch_size:(bchidx + 1) * batch_size] train_cost = train_model(random_indexes) test_y_preds = test_model() valid_y_preds = valid_model() if usefscore: test_acc = eval.fscore(gold_test_y, test_y_preds) valid_acc = eval.fscore(gold_valid_y, valid_y_preds) else: test_acc = eval.accuracy(gold_test_y, test_y_preds) valid_acc = eval.accuracy(gold_valid_y, valid_y_preds) if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_valid_ep = epoch if final_acc < test_acc: final_acc = test_acc with open('../exp/predictions', 'w') as writer: for lblidx in test_y_preds: writer.write(str(lblidx) + '\n') if test_acc > best_test_acc: best_test_acc = test_acc best_test_ep = epoch # output predictions print 'test accuracy is: ' + str(test_acc) print 'valid accuracy is: ' + str(valid_acc) print 'current best valid prediction accuracy is: ' + str( best_valid_acc) + ' at epoch ' + str(best_valid_ep) print 'current best final prediction accuracy is: ' + str( final_acc) + ' at epoch ' + str(best_valid_ep) print 'current best test prediction accuracy is: ' + str( best_test_acc) + ' at epoch ' + str(best_test_ep) last_acc = test_acc # final_acc = last_acc return final_acc
def train(data, lst_label): print "START TRAINING!" label = getLabel(lst_label) label = sps.csr_matrix(label) num_feature = data.shape[1] epsilon = 0.0005 # learning rate lamda = 0.001 # reguarization factor num_batch = 2500 momentum = 0.4 maxIter = 1500 W = init(num_feature) W = sps.csr_matrix(W) prev_W_grad = sps.csr_matrix(W.shape) prev_train_error = 10.0 prior = [ 0.2 / 0.10199, 0.2 / 0.08965, 0.2 / 0.14196, 0.2 / 0.29750, 0.2 / 0.36689 ] prior_m = np.array(prior) prior_m = np.matrix(prior_m) prior_m = prior_m.transpose() prior_m = np.repeat(prior_m, num_feature, axis=1) prior_m = sps.csr_matrix(prior_m) for iter in xrange(maxIter): shuffled_data, shuffled_label = shuffle(data, label) train_error = 0 batch_size = data.shape[0] / num_batch for batch in xrange(num_batch): # print "start batch" # get batch size start = batch * batch_size end = (batch + 1) * batch_size if batch < num_batch - 1 else data.shape[0] batch_size = batch_size if batch < num_batch - 1 else data.shape[ 0] - batch * batch_size batch_data = shuffled_data[start:end, :] batch_label = shuffled_label[start:end, :] # print "start calculating prob" prob = getProb(W, batch_data) # calculate gradient # print "start calculating gradient" delta = batch_label - prob # train_error += delta.multiply(delta).sum() train_error += getError(batch_label, prob) dW = delta.transpose().dot(batch_data) # dW -= lamda * W.multiply(prior_m) dW -= lamda * W # update weights # print "start updating weights" W_grad = momentum * prev_W_grad + epsilon * dW.multiply(prior_m) W += W_grad prev_W_grad = W_grad epsilon *= 0.995 train_error = train_error / data.shape[0] if math.fabs(train_error - prev_train_error) < 1e-8: break prev_train_error = train_error print "train_error:", train_error, " iter: ", iter print "traning finished!" prob = getProb(W, data) # hard predict lst_pred_hard = pred_hard_helper(prob) # soft predict lst_pred_soft = pred_soft_helper(prob) print "accuracy:", eval.accuracy(lst_pred_hard, lst_label), " rmse:", eval.rmse( lst_pred_soft, lst_label) # save_model(W, "cf_model") return W
def train_joint_conv_net( w2vFile, dataFile, labelStructureFile, cfswitch, filter_hs, n_epochs=1000, batch_size=50, feature_maps=100, hasmlphidden=False, usefscore=False ): """ function: learning and testing sentence level Question Classification Task in a joint fashion, ie. adding the loss function of coarse label prediction and fine label prediction together. :param w2vFile: the path of the word embedding file(pickle file with numpy array value, produced by word2vec.py module) :param dataFile: the dataset file produced by process_data.py module :param labelStructureFile: a file that describes label structure of coarse and fine grains. It is produced in produce_data.py in outputlabelstructure() "param filter_h: sliding window size. *** warning *** you cannot just change window size here, if you want to use a different window for the experiment. YOU NEED TO RE-PRODUCE A NEW DATASET IN process_data.py WITH THE CORRESPONDING WINDOW SIZE. :param n_epochs: the number of epochs the training needs to run :param batch_size: the size of the mini-batch :param feature_maps: how many dimensions you want the abstract sentence representation to be :param mlphiddensize: the size of the hidden layer in MLP :param logFile: the output file of the brief info of each epoch results, basically a save for the print out :param logTest: keep track of results on test set :return: a tuple of best fine grained prediction accuracy and its corresponding coarse grained prediction accuracy """ """ Loading and preparing data """ datasets = load(dataFile) clbl_vec, flbl_vec = process_qc.label_structure(labelStructureFile) trainDataSetIndex = 0 testDataSetIndex = 1 validDataSetIndex = 2 sentenceIndex = 0 clblIndex = 1 # coarse label(clbl) index in the dataset structure flblIndex = 2 # fine label(flbl) index if cfswitch == 'c': lblIndex = clblIndex label_vec = clbl_vec elif cfswitch == 'f': lblIndex = flblIndex label_vec = flbl_vec else: print 'wrong arg value in: cfswtich!' sys.exit() label_size = len(label_vec) if hasmlphidden: layer_size = [feature_maps * len(filter_hs), 100, label_size] else: layer_size = [feature_maps * len(filter_hs), label_size] # train part train_y = shared_store(datasets[trainDataSetIndex][lblIndex]) train_x = shared_store(datasets[trainDataSetIndex][sentenceIndex]) # test part gold_test_y = datasets[testDataSetIndex][lblIndex] test_x = shared_store(datasets[testDataSetIndex][sentenceIndex]) # valid part gold_valid_y = datasets[validDataSetIndex][lblIndex] valid_x = shared_store(datasets[validDataSetIndex][sentenceIndex]) w2v = load(w2vFile) img_w = w2v.shape[1] # the dimension of the word embedding img_h = len(datasets[trainDataSetIndex][sentenceIndex][0]) # length of each sentence filter_w = img_w # word embedding dimension image_shapes = [] filter_shapes = [] for i in xrange(len(filter_hs)): image_shapes.append((batch_size, 1, img_h, img_w * filter_hs[i])) filter_shapes.append((feature_maps, 1, 1, filter_w * filter_hs[i])) pool_size = (img_h, 1) train_size = len(datasets[trainDataSetIndex][sentenceIndex]) print 'number of sentences in training set: ' + str(train_size) print 'max sentence length: ' + str(len(datasets[trainDataSetIndex][sentenceIndex][0])) print 'train data shape: ' + str(datasets[trainDataSetIndex][sentenceIndex].shape) print 'word embedding dim: ' + str(w2v.shape[1]) """ Building model in theano language, less comments here. You can refer to Theano web site for more details """ batch_index = T.lvector('hello_batch_index') x = T.itensor3('hello_x') y = T.ivector('hello_y') w2v_shared = theano.shared(value=w2v, name='w2v', borrow=True) rng = np.random.RandomState(3435) conv_layer_outputs = [] conv_layers = [] for i in xrange(len(filter_hs)): input = w2v_shared[x.flatten()].reshape( (x.shape[0], 1, x.shape[1], x.shape[2] * img_w) )[:, :, :, 0:filter_hs[i] * img_w] conv_layer = LeNetConvPoolLayer( rng, input=input, filter_shape=filter_shapes[i], poolsize=pool_size, image_shape=image_shapes[i], non_linear="relu" ) conv_layers.append(conv_layer) conv_layer_outputs.append(conv_layer.output.flatten(2)) mlp_input = T.concatenate(conv_layer_outputs, 1) classifier = MLPDropout( rng=rng, input=mlp_input, layer_sizes=layer_size, # [feature_maps * len(filter_hs), label_size], dropout_rate=0.5, activation=Iden ) params = [] for conv_layer in conv_layers: params += conv_layer.params params += classifier.params cost = classifier.negative_log_likelihood(y) updates = sgd_updates_adadelta(params, cost) n_batches = train_x.shape.eval()[0] / batch_size train_model = theano.function( inputs=[batch_index], outputs=cost, updates=updates, givens={ x: train_x[batch_index], y: train_y[batch_index], }, ) """ Building test model """ test_conv_layer_outputs = [] for i, conv_layer in enumerate(conv_layers): test_input = w2v_shared[x.flatten()].reshape( (x.shape[0], 1, x.shape[1], x.shape[2] * img_w) )[:, :, :, 0:filter_hs[i] * img_w] test_conv_layer_outputs.append( conv_layer.conv_layer_output( test_input, (test_x.shape.eval()[0], 1, img_h, img_w * filter_hs[i]) ).flatten(2) ) test_prediction = classifier.predict(T.concatenate(test_conv_layer_outputs, 1)) # test on test set test_model = theano.function( inputs=[], outputs=test_prediction, givens={ x: test_x, } ) # test on valid set valid_model = theano.function( inputs=[], outputs=test_prediction, givens={ x: valid_x, } ) """ Training part """ print 'training....' best_valid_ep = 0 best_valid_acc = 0. best_test_ep = 0 best_test_acc = 0. final_acc = 0. epoch = 0 last_acc = 0. # create gold value sequences, required by the eval.py with open('../exp/goldrs', 'w') as writer: for lbl in gold_test_y: writer.write(str(lbl) + '\n') # training loop while (epoch < n_epochs): epoch += 1 print '************* epoch ' + str(epoch) batch_indexes = range(train_size) rng.shuffle(batch_indexes) for bchidx in xrange(n_batches): random_indexes = batch_indexes[bchidx * batch_size:(bchidx + 1) * batch_size] train_cost = train_model(random_indexes) test_y_preds = test_model() valid_y_preds = valid_model() if usefscore: test_acc = eval.fscore(gold_test_y, test_y_preds) valid_acc = eval.fscore(gold_valid_y, valid_y_preds) else: test_acc = eval.accuracy(gold_test_y, test_y_preds) valid_acc = eval.accuracy(gold_valid_y, valid_y_preds) if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_valid_ep = epoch if final_acc < test_acc: final_acc = test_acc with open('../exp/predictions', 'w') as writer: for lblidx in test_y_preds: writer.write(str(lblidx) + '\n') if test_acc > best_test_acc: best_test_acc = test_acc best_test_ep = epoch # output predictions print 'test accuracy is: ' + str(test_acc) print 'valid accuracy is: ' + str(valid_acc) print 'current best valid prediction accuracy is: ' + str(best_valid_acc) + ' at epoch ' + str(best_valid_ep) print 'current best final prediction accuracy is: ' + str(final_acc) + ' at epoch ' + str(best_valid_ep) print 'current best test prediction accuracy is: ' + str(best_test_acc) + ' at epoch ' + str(best_test_ep) last_acc = test_acc # final_acc = last_acc return final_acc