def evaluate(test_data, net, filepath=None): net.eval() test_data_manager = DataManager(len(test_data), num_epoch=1, batch_size=batch_size) num_batch = test_data_manager.num_batch_per_epoch pred = [] truth = [] logits = [] for batch in range(num_batch): t0 = time.time() batch_data = test_data_manager.get_batch(test_data) accel = Variable( torch.from_numpy( np.transpose(np.asarray([item[0] for item in batch_data]), [0, 2, 1])).cuda()) alphanum = Variable( torch.from_numpy( np.transpose(np.asarray([item[1] for item in batch_data]), [0, 2, 1])).cuda()) special = Variable( torch.from_numpy( np.transpose(np.asarray([item[2] for item in batch_data]), [0, 2, 1])).cuda()) labels = [item[3] for item in batch_data] label_HDRS = [item[0] for item in labels] label_YMRS = [item[1] for item in labels] timestamp = Variable( torch.from_numpy(np.asarray([item[4] for item in batch_data ])).cuda().float()) users = Variable( torch.from_numpy(np.asarray([item[5] for item in batch_data])).cuda()) t1 = time.time() outputs = net.forward(accel, alphanum, special, timestamp, users) if mode == 'clf': probs = torch.sigmoid(outputs) logits += list(probs.data.cpu().numpy()) pred += list(probs.data.cpu().numpy() > 0.5) truth += label_HDRS else: pred += list(outputs.data.cpu().numpy()) if mode == 'rgs_hdrs': truth += label_HDRS else: truth += label_YMRS if mode == 'clf': acc = accuracy_score(y_true=truth, y_pred=pred) f1 = f1_score(y_true=truth, y_pred=pred) if filepath is not None: save_log_data([logits, truth], filepath) return [acc, f1] else: rmse = np.sqrt(mean_squared_error(y_true=truth, y_pred=pred)) return [rmse]
def main(model='2cnn_rnn_sin_id', pid=0): if parse_label: pl = '_norm' else: pl = '' pl += ('_' + str(pid)) checkpoint_path = checkpoint_dir + model + '_' + mode + ctrl + pl + '.ckpt' if model == 'fillna': log_file = result_dir + 'log_ef_long_' + mode + ctrl + pl + '.txt' else: log_file = result_dir + 'log_ef_short_' + mode + ctrl + pl + '.txt' net = CNN_RNN_EF2(use_accel=False) if model == 'fillna': train_data = train_data_l test_data = test_data_l else: train_data = train_data_s test_data = test_data_s net.cuda() if mode == 'clf': criterion = nn.BCEWithLogitsLoss() else: criterion = nn.MSELoss() optimizer = optim.RMSprop(net.parameters(), lr=learning_rate, weight_decay=0.001) print 'training...' best_epoch = 0 best_acc = 0 best_f1 = 0 best_rmse = 1e10 results = [] train_data_manager = DataManager(len(train_data), num_epoch=num_epochs, batch_size=batch_size) for epoch in range(num_epochs): running_loss = 0.0 num_batch = train_data_manager.num_batch_per_epoch net.train() for batch in range(num_batch): optimizer.zero_grad() t0 = time.time() batch_data = train_data_manager.get_batch(train_data) merged_data = Variable( torch.from_numpy( np.transpose(np.asarray([item[0] for item in batch_data]), [0, 2, 1])).float().cuda()) accel = Variable( torch.from_numpy( np.transpose(np.asarray([item[4] for item in batch_data]), [0, 2, 1])).float().cuda()) timestamp = Variable( torch.from_numpy(np.asarray([item[2] for item in batch_data ])).cuda().float()) users = Variable( torch.from_numpy(np.asarray([item[5] for item in batch_data])).cuda()) labels = [item[1] for item in batch_data] label_HDRS = Variable( torch.from_numpy(np.asarray([item[0] for item in labels ])).cuda()).float() label_YMRS = Variable( torch.from_numpy(np.asarray([item[1] for item in labels ])).cuda()).float() t1 = time.time() outputs = net.forward(merged_data, accel, timestamp=timestamp) if mode == 'clf' or mode == 'rgs_hdrs': loss = criterion(outputs, label_HDRS) else: loss = criterion(outputs, label_YMRS) loss.backward() optimizer.step() t2 = time.time() running_loss += loss.data[0] if batch % 10 == 9: # print every 10 mini-batches #print('[%d, %5d] loss: %.3f, data time: %.3f, train time: %.3f' % # (epoch + 1, batch + 1, running_loss/10, t1 - t0, t2 - t1)) running_loss = 0.0 res = evaluate(test_data, net) results.append(res) #print 'rmse:', res[0] if res[0] < best_rmse: best_rmse = res[0] best_epoch = epoch + 1 #torch.save(net.state_dict(), checkpoint_path) #print 'best: epoch %d, rmse: %f' % (best_epoch, best_rmse) if mode == 'clf': print 'best: epoch %d, acc: %f, f1: %f' % (best_epoch, best_acc, best_f1) else: print 'best: epoch %d, rmse: %f' % (best_epoch, best_rmse) save_log_data(results, log_file) #print 'program finished' return best_rmse
def main(model='2cnn_rnn_sin_id', pid=0): if parse_label: pl = '_norm' else: pl = '' pl += ('_' + str(pid)) checkpoint_path = checkpoint_dir + model + '_' + mode + ctrl + pl + '.ckpt' if model == 'DeeperMood_24h': log_file = './results/log_DM_24h2_' + mode + ctrl + pl + '.txt' ab_log = './results/hours_emb_2_' + mode + ctrl + pl + '.txt' net = CNN_RNN(use_time='24h2') elif model == '2cnn_rnn_sin_id': log_file = result_dir + 'log_2cnn_rnn_sin_id_' + mode + ctrl + pl + '.txt' ab_log = result_dir + 'alpha_beta_id_' + mode + ctrl + pl + '.txt' net = CNN_RNN(use_time='sin_id', num_user=num_users) elif model == '2cnn_rnn_sin': log_file = result_dir + 'log_2cnn_rnn_sin_' + mode + ctrl + pl + '.txt' net = CNN_RNN(use_time='sin', num_user=num_users) elif model == '2cnn_rnn': log_file = result_dir + 'log_2cnn_rnn_' + mode + ctrl + pl + '.txt' net = CNN_RNN(use_time=None) elif model == 'cnn_rnn': log_file = result_dir + 'log_cnn_rnn_' + mode + ctrl + pl + '.txt' net = CNN_RNN_1() elif model == 'cnn': log_file = result_dir + 'log_cnn_' + mode + ctrl + pl + '.txt' net = AllConv() elif model == 'cnn2': log_file = result_dir + 'log_cnn2_' + mode + ctrl + pl + '.txt' net = AllConv(use_special=False) else: # model == 'rnn': log_file = result_dir + 'log_rnn_' + mode + ctrl + pl + '.txt' net = RNN() train_data = train_data_d test_data = test_data_d net.cuda() if mode == 'clf': criterion = nn.BCEWithLogitsLoss() else: criterion = nn.MSELoss() optimizer = optim.RMSprop(net.parameters(), lr=learning_rate, weight_decay=0.001) print('training...') best_epoch = 0 best_acc = 0 best_f1 = 0 best_rmse = 1e10 results = [] train_data_manager = DataManager(len(train_data), num_epoch=num_epochs, batch_size=batch_size) for epoch in range(num_epochs): num_batch = train_data_manager.num_batch_per_epoch net.train() for batch in range(num_batch): optimizer.zero_grad() t0 = time.time() batch_data = train_data_manager.get_batch(train_data) accel = Variable( torch.from_numpy( np.transpose(np.asarray([item[0] for item in batch_data]), [0, 2, 1])).cuda()) alphanum = Variable( torch.from_numpy( np.transpose(np.asarray([item[1] for item in batch_data]), [0, 2, 1])).cuda()) special = Variable( torch.from_numpy( np.transpose(np.asarray([item[2] for item in batch_data]), [0, 2, 1])).cuda()) timestamp = Variable( torch.from_numpy(np.asarray([item[4] for item in batch_data ])).cuda().float()) users = Variable( torch.from_numpy(np.asarray([item[5] for item in batch_data])).cuda()) labels = [item[3] for item in batch_data] label_HDRS = Variable( torch.from_numpy(np.asarray([item[0] for item in labels ])).cuda()).float() label_YMRS = Variable( torch.from_numpy(np.asarray([item[1] for item in labels ])).cuda()).float() t1 = time.time() outputs = net.forward(accel, alphanum, special, timestamp, users) if mode == 'clf' or mode == 'rgs_hdrs': loss = criterion(outputs, label_HDRS) else: loss = criterion(outputs, label_YMRS) loss.backward() optimizer.step() t2 = time.time() res = evaluate(test_data, net) results.append(res) if res[0] < best_rmse: best_rmse = res[0] best_epoch = epoch + 1 #torch.save(net.state_dict(), checkpoint_path) #print 'best: epoch %d, rmse: %f' % (best_epoch, best_rmse) if mode == 'clf': print('best: epoch %d, acc: %f, f1: %f' % (best_epoch, best_acc, best_f1)) else: print('best: epoch %d, rmse: %f' % (best_epoch, best_rmse)) save_log_data(results, log_file) if model == '2cnn_rnn_sin_id': idxs = Variable( torch.from_numpy(np.asarray([i for i in range(num_users) ])).long().cuda()) alpha = net.alpha(idxs).data beta = net.beta(idxs).data gamma = net.gamma(idxs).data delta = net.delta(idxs).data with open(ab_log, 'w') as fout: for i in range(num_users): fout.write( str(alpha[i][0]) + '\t' + str(beta[i][0]) + '\t' + str(gamma[i][0]) + '\t' + str(delta[i][0]) + '\n') return best_rmse