def val_acc(val_loader, model, device, flip=False): ''' model evaluation :param: val_loader: dataloader, model: cpkt, device:device, flip: bool :return: float ''' model.eval() with torch.no_grad(): score = [] for datas, ages, sexs, labels in val_loader: datas = datas.to(device) ages = ages.to(device) sexs = sexs.to(device) labels = labels.to(device) outputs = model(datas, ages, sexs) if flip: datas_flip = datas.flip([2]) outputs_flip = model(datas_flip) outputs_mean = torch.add(outputs, outputs_flip) / 2 x = calc_acc(labels, outputs_mean) else: x = calc_acc(labels, outputs) score.append(x) test_acc = sum(score) / len(score) return test_acc
def train_simultaneously(writer, learner, lr, lr_scheduler, n_epochs, train_loader, valid_loader, test_loader, y_criterion, d_criterion, best_valid_acc, final_test_acc, acc_invariant_curve, weight_decay, n_checks, dataset_name_and_test_key): optimizer = optim.RMSprop(learner.parameters(), lr=lr, weight_decay=weight_decay) if lr_scheduler: learner.set_lr_scheduler(optimizer, n_epochs * len(train_loader), lr) for epoch in range(1, n_epochs + 1): # ====== train ====== y_loss_li = [] y_acc_li = [] d_loss_li = [] d_acc_li = [] d_grad_li = [] for i, (X, y, d) in enumerate(train_loader): optimizer.zero_grad() learner.scheduler_step() X = Variable(X.float().cuda()) y = Variable(y.long().cuda()) d = Variable(d.long().cuda()) y_pred, d_pred = learner(X) y_loss = y_criterion(y_pred, y) y_acc = calc_acc(y_pred, y) d_loss = d_criterion(d_pred, d) d_acc = calc_acc(d_pred, d) loss = y_loss + d_loss loss.backward() d_grad_li.append( np.mean([ x.grad.abs().mean().data.cpu().numpy() for x in list(learner.D.parameters()) ])) optimizer.step() y_loss_li.append(y_loss.data[0]) y_acc_li.append(y_acc) d_loss_li.append(d_loss.data[0]) d_acc_li.append(d_acc) y_loss_mean = np.mean(y_loss_li) y_acc_mean = np.mean(y_acc_li) d_loss_mean = np.mean(d_loss_li) d_acc_mean = np.mean(d_acc_li) d_grad_mean = np.mean(d_grad_li) best_valid_acc, final_test_acc = validation_step( learner, train_loader, valid_loader, test_loader, y_criterion, writer, epoch, y_loss_mean, y_acc_mean, d_loss_mean, d_acc_mean, n_epochs, best_valid_acc, final_test_acc, d_grad_mean, acc_invariant_curve, n_checks, dataset_name_and_test_key) return final_test_acc
def test_calcuate_accuracy_from_play(self): play = {"count50": "0", "count100": "22", "count300": "680", "countmiss": "0", "countkatu": "17", "countgeki": "174"} # taken from my current top play self.assertEqual(utils.calc_acc(play, mode=0), "97.91") self.assertEqual(utils.calc_acc(play, mode=1), "98.43") self.assertEqual(utils.calc_acc(play, mode=2), "97.64") self.assertEqual(utils.calc_acc(play, mode=3), "97.72")
def validate(self): losses = AverageMeter() acc = AverageMeter() self.model.eval() loader = self.val_loader for batch_step, (images, target) in enumerate(loader): target = target.cuda(async=True) image_var = Variable(images, volatile=True) label_var = Variable(target, volatile=True) y_pred = self.model(image_var) loss = self.criterion(y_pred, label_var) batch_acc, _ = calc_acc(y_pred.data, target, topk=(1, 1)) losses.update(loss.data[0], images.size(0)) acc.update(batch_acc[0], images.size(0)) if batch_step % self.cfg['print_freq'] == 0: print('Val: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( batch_step, len(loader), loss=losses, top1=acc)) print('Val Accuracy %.6f | Loss %.6f' % (acc.avg, losses.avg)) return acc.avg
def train_epoch(model, optimizer, criterion, train_dataloader, show_interval=100): model.train() acc_meter, loss_meter, iter_count = 0, 0, 0 for inputs, targets in train_dataloader: inputs = inputs.to(device) targets = targets.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() loss_meter += loss.item() acc = utils.calc_acc(targets, torch.sigmoid(outputs)) acc_meter += acc iter_count += 1 if iter_count != 0 and iter_count % show_interval == 0: print("train --- %d,loss : %.3e, f1 : %.3f" % (iter_count, loss.item(), acc)) return loss_meter / iter_count, acc_meter / iter_count
def validate(learner, loader, criterion): y_loss_li = [] y_acc_li = [] ys = [] pred_ys = [] for i, (X_batch, y_batch, _) in enumerate(loader): X_batch = Variable(X_batch.float(), volatile=True).cuda() y_batch = Variable(y_batch.long()).cuda() y_pred = learner(X_batch, require_domain=False) y_loss = criterion(y_pred, y_batch) y_acc = calc_acc(y_pred, y_batch) y_loss_li.append(y_loss.data[0]) y_acc_li.append(y_acc) ys.append(y_batch.cpu().data.numpy()) pred_ys.append(np.argmax(y_pred.cpu().data.numpy(), axis=1)) y = np.concatenate(ys) pred_y = np.concatenate(pred_ys) cm = metrics.confusion_matrix(y, pred_y) cm_pre = cm.astype(np.float32) / cm.sum(axis=0) * 100 cm_rec = cm.astype(np.float32) / cm.sum(axis=1) * 100 cm_f = (2 * cm_pre * cm_rec) / (cm_pre + cm_rec) fvalue = np.diag(cm_f) return y_loss_li, y_acc_li, fvalue
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() acc_meter, loss_meter, iter_count = 0, 0, 0 with torch.no_grad(): for inputs, targets in val_dataloader: inputs = inputs.to(device) targets = targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets) loss_meter += loss.item() acc = utils.calc_acc(targets, torch.sigmoid(outputs)) acc_meter += acc iter_count += 1 return loss_meter / iter_count, acc_meter / iter_count
def train_epoch(self, loader, epoch): losses = AverageMeter() acc = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() end = time.time() for batch_step, (images, target) in enumerate(loader): data_time.update(time.time() - end) target = target.cuda(async=True) image_var = Variable(images) label_var = Variable(target) y_pred = self.model(image_var) # import pdb # pdb.set_trace() if isinstance(y_pred, tuple): loss_lab = self.criterion(y_pred[0], label_var) loss_aux = self.criterion(y_pred[1], label_var) loss = loss_lab + loss_aux y_pred = y_pred[0] else: loss = self.criterion(y_pred, label_var) batch_acc, _ = calc_acc(y_pred.data, target, topk=(1, 1)) losses.update(loss.data[0], images.size(0)) acc.update(batch_acc[0], images.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) end = time.time() if batch_step % self.cfg['print_freq'] == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, batch_step, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=acc))
nan_happend = False for j in range(args.num_trials): with tf.Session() as sess: sess.run(init_op) t0 = time.time() for epoch in range(args.epochs): t = time.time() # training step sess.run([train_op], feed_dict={training: True}) # validation step [loss_train_np, loss_val_np, Yhat_np] = sess.run([loss_train, loss_val, Yhat], feed_dict={training: False}) acc_train = utils.calc_acc(Y, Yhat_np, idx_train) acc_val = utils.calc_acc(Y, Yhat_np, idx_val) acc_test = utils.calc_acc(Y, Yhat_np, idx_test) test_accs[d, i, j, epoch, 0] = loss_val_np test_accs[d, i, j, epoch, 1] = acc_test np.save("egnn_cs_curvatures_5_trials", test_accs) if np.isnan(loss_train_np): nan_happend = True print('NaN loss, stop!') break print( 'Epoch=%d, loss=%.4f, acc=%.4f | val: loss=%.4f, acc=%.4f t=%.4f'
from algos import groupfair, regularizer from utils import calc_acc, calc_ind_viol, calc_eo_viol methods = [regularizer.Regularizer, groupfair.Plugin, groupfair.WERM] params_list = [{ 'rho': np.logspace(-1, 5, 30), 'T': [2000], 'lr': [0.001], 'nlayers': [1], 'fairness': ['EO'] }, { 'B': [50], 'nu': np.logspace(-4, 0, 20), 'T': [10000], 'lr': [0.01], 'fairness': ['EO'] }, { 'B': [50], 'nu': np.logspace(-4, 0, 20), 'T': [10000], 'lr': [0.01], 'fairness': ['EO'] }] metrics_list = [[('accuracy', lambda p, x, xp, y: calc_acc(p, y)), ('ind_viol', lambda p, x, xp, y: calc_eo_viol(p, xp, y))], [('accuracy', lambda p, x, xp, y: calc_acc(p, y)), ('ind_viol', lambda p, x, xp, y: calc_eo_viol(p, xp, y))], [('accuracy', lambda p, x, xp, y: calc_acc(p, y)), ('ind_viol', lambda p, x, xp, y: calc_eo_viol(p, xp, y))]]
def train_alternately(writer, learner, lr, lr_scheduler, n_epochs, num_train_d, train_loader, valid_loader, test_loader, y_criterion, d_criterion, best_valid_acc, final_test_acc, acc_invariant_curve, weight_decay, n_checks, dataset_name_and_test_key): y_optimizer = optim.RMSprop(list(learner.E.parameters()) + list(learner.M.parameters()), lr=lr, weight_decay=weight_decay) d_optimizer = optim.RMSprop(learner.D.parameters(), lr=lr) if lr_scheduler: learner.set_lr_scheduler(y_optimizer, n_epochs * len(train_loader), lr) for epoch in range(1, n_epochs + 1): # ====== train ====== y_loss_li = [] y_acc_li = [] d_loss_li = [] d_acc_li = [] d_grad_li = [] for i in range(len(train_loader)): learner.scheduler_step() y_optimizer.zero_grad() # update Domain Discriminator for _ in range(num_train_d): d_optimizer.zero_grad() X, _, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) d = Variable(d.long().cuda()) d_pred = learner(X, freeze_E=True, require_class=False) d_loss = d_criterion(d_pred, d) d_acc = calc_acc(d_pred, d) d_loss.backward() d_grad_li.append( np.mean([ x.grad.abs().mean().data.cpu().numpy() for x in list(learner.D.parameters()) ])) d_optimizer.step() d_loss_li.append(d_loss.data[0]) d_acc_li.append(d_acc) # update Encoder and Classifier X, y, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) y = Variable(y.long().cuda()) d = Variable(d.long().cuda()) y_pred, d_pred = learner(X, freeze_E=False) y_loss = y_criterion(y_pred, y) d_loss = d_criterion(d_pred, d) loss = y_loss + d_loss loss.backward() y_optimizer.step() y_acc = calc_acc(y_pred, y) y_loss_li.append(y_loss.data[0]) y_acc_li.append(y_acc) y_loss_mean = np.mean(y_loss_li) y_acc_mean = np.mean(y_acc_li) d_loss_mean = np.mean(d_loss_li) d_acc_mean = np.mean(d_acc_li) d_grad_mean = np.mean(d_grad_li) best_valid_acc, final_test_acc = validation_step( learner, train_loader, valid_loader, test_loader, y_criterion, writer, epoch, y_loss_mean, y_acc_mean, d_loss_mean, d_acc_mean, n_epochs, best_valid_acc, final_test_acc, d_grad_mean, acc_invariant_curve, n_checks, dataset_name_and_test_key) return final_test_acc
pruned_model = PrunedNet() msk = build_mask(model) optim = torch.optim.Adam(model.parameters()) loss_func = nn.CrossEntropyLoss() acc_val = 0 for i in range(cfg.EPOCH): for img, label in train_data: # pred = model(img.cuda()).cpu() pred = model(img) loss_val = loss_func(pred, label) optim.zero_grad() loss_val.backward() extra_grad(model) acc_val = calc_acc(pred, label) print(acc_val) if acc_val > 0.9: update_grad(model, msk) optim.step() model_acc = [] for img, label in test_data: # pred = model(img.cuda()).cpu() pred = model(img) model_acc.append(calc_acc(pred, label)) print('model_acc:', np.mean(model_acc)) # torch.save(msk, './models//msk.pth') # torch.save(model.state_dict(), cfg.PATH)
def train(): assert max_point == 8192, "wrong max point,need to change pointnet_util->fp1 = PointNetFeaturePropagation_PointConv->in_channel" # read device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: {}".format(device), end="\n") # load dataset data_loader_train = Scannetloader(scannet_file_root, max_point, n_class, is_train=True) data_loader_val = Scannetloader(scannet_file_root, max_point, n_class, is_train=False) train_loader = torch.utils.data.DataLoader(data_loader_train, batch_size=batch_size, shuffle=True, num_workers=n_workers) val_loader = torch.utils.data.DataLoader(data_loader_val, batch_size=batch_size, shuffle=True, num_workers=n_workers) # set model running devices #model = PointNet2SemSeg(n_class) model = PointNet2SemSeg_PointConv(n_class) model.to(device) #?torch.backends.cudnn.benchmark = True print("usable gpu nums: {}".format(torch.cuda.device_count())) # set optimizer, lr_scheduler, loss function optimizer = None if optimizer_name == "rmsprop": optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=decay_rate) elif optimizer_name == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=decay_rate) elif optimizer_name == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.5) # criterion #criterion = torch.nn.BCEWithLogitsLoss() #criterion = F.nll_loss() model, optimizer = amp.initialize(model, optimizer, opt_level="O1") # O1 not 01 model = torch.nn.DataParallel( model, device_ids=[i for i in range(torch.cuda.device_count())]) # load checkpoints last_best_iou = -100. load_ckpt_path = os.path.join("checkpoints", load_ckpt_name) start_epoch = 0 if is_load_checkpoints: if torch.cuda.is_available(): checkpoint = torch.load(load_ckpt_path) else: checkpoint = torch.load(load_ckpt_path, map_location='cpu') model.load_state_dict(checkpoint['model_state']) last_best_iou = checkpoint['best_iou'] start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint["optimizer"]) amp.load_state_dict(checkpoint["amp"]) print('Checkpoint resume success... last iou:{:.4%},last epoch:{}'. format(last_best_iou, start_epoch)) # train epoch best_iou = last_best_iou time_epoch = time.time() i = 0 print("training...") for epoch in range(start_epoch, max_epoch): time_step = time.time() lr_scheduler.step(epoch=epoch) for step, batch in enumerate(train_loader): model.train() points = batch[0].to(device) targets = batch[1].long().to(device) points = points.transpose(2, 1) optimizer.zero_grad() # print(points.shape) = [3, 9, 8192] pred = model(points[:, :3, :], points[:, 3:, :]) pred = pred.contiguous().view(-1, n_class) # targets = targets.view(-1, 1)[:, 0] targets = targets.contiguous().view(-1) #loss = criterion(pred.float(),targets.float()) loss = F.cross_entropy(input=pred, target=targets) # log_softmax + nll_loss # loss = F.nll_loss(pred, targets) # softmax + Cross-Entropy cost cur_loss = np.float(loss.cpu().detach().numpy( )) # 不要用.data,会导致tensor flag: requires_grad=False,终止求导 with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() #loss.backward() optimizer.step() writer.add_scalars('Train_record/Loss per X step', {'loss': cur_loss}, step + epoch * len(train_loader)) cur_time = time.time() print( "\rtrain | epoch: {}/{} | step:{}/{} | {} s/step | time used:{:d}s | currency loss:{:.4}" .format(epoch, max_epoch, step + 1, len(train_loader), int((cur_time - time_step) / (step + 1)), int(cur_time - time_epoch), cur_loss), end='', flush=True) # val print("\nvaluing") with torch.no_grad(): model.eval() cat_table = 0 total_cat_table = np.zeros((2, n_class)) # [0]:acc,[1]:miou time_step_val = time.time() for step_val, batch_val in enumerate(val_loader): #print("read batch time used:",time.time()-time_step) points_val = batch_val[0].to(device) labels_val = batch_val[1].cpu().data.numpy() points_val = points_val.transpose(2, 1) pred_val = model(points_val[:, :3, :], points_val[:, 3:, :]) # pred_val.shape = [batch_size,n_point,n_class] pred_val = F.softmax(pred_val, dim=2).cpu().data.numpy() pred_val = np.argmax(pred_val, axis=2) #visualizer(points_val.transpose(2,1)[0],pred_val[0],labels_val[0]) cat_table = calc_iou(pred_val, labels_val) total_cat_table += cat_table cur_time = time.time() print( "\rvalue | step:{}/{} | {} s/step | mean iou:{:.4} | mean acc:{:.4}" .format(step_val + 1, len(val_loader), int((cur_time - time_step_val) / (step_val + 1)), np.mean(cat_table[1]), np.mean(cat_table[0])), end='', flush=True) total_cat_table = total_cat_table / len(val_loader) # total_cat_table = total_cat_table / (step_val+1) mean_iou = np.mean(total_cat_table[1]) mean_acc = np.mean(total_cat_table[0]) print("\ntrue accuracy:{:.3f}".format( calc_acc(pred_val, labels_val))) print("\naccuracy: {:.3f} | mean iou: {:.3f}".format( mean_acc, mean_iou)) writer.add_scalars('Val_record/mIoU', {'accuracy': mean_acc}, epoch) writer.add_scalars('val_record/mIoU', {'mIoU': mean_iou}, epoch) print("class name \tmean accuracy\tmIoU") for i in range(n_class): print("%-18s %.3f \t%.3f" % (CLASS_LABELS[i], total_cat_table[0][i], total_cat_table[1][i])) if mean_iou > best_iou: best_iou = mean_iou state = { "epoch": epoch + 1, "model_state": model.state_dict(), "best_iou": best_iou, "amp": amp.state_dict(), "optimizer": optimizer.state_dict() } if not os.path.isdir('./checkpoints'): os.mkdir('./checkpoints') save_path = os.path.join( './checkpoints', "apex_eph_{}_iou_{:.2%}.ckpt.pth".format( epoch + 1, best_iou)) torch.save(state, save_path) print("checkpoint saved success") writer.close()
dataset = TwitterDataset('data/train.csv', nb_words, 0.1, 0.1) print('Done preparing the dataset, serializing') pickle.dump(dataset, pkl, pickle.HIGHEST_PROTOCOL) print('Done serializing') # Fit several models with varying pseudocount parameter models = dict() for pseudocount in range(1, 30): # Fit the model print('Fitting a model with pseudocount={}'.format(pseudocount)) model = MultinomialNaiveBayes(nb_classes, nb_words, pseudocount) model.fit(dataset.train) # Evaluate on train set preds_train = model.predict(dataset.train['x']) acc_train = calc_acc(dataset.train['y'], preds_train) print('Train set accuracy: {0:.4f}'.format(acc_train)) # Evaluate on validation set preds_val = model.predict(dataset.val['x']) acc_val = calc_acc(dataset.val['y'], preds_val) print('Validation set accuracy: {0:.4f}'.format(acc_val)) # Save the model models[model] = acc_val # Find the best model (best validation set accuracy) best_model = max(models, key=models.get) print('Best pseudocount is {}'.format(best_model.pseudocount)) # Evaluate on test set
def train(writer, train_dataset, valid_dataset, test_dataset, model, batch_size, dataset_name, lr, n_iter, lr_scheduler, alpha, num_train_d, num_train_e, alpha_scheduler, p_d, weight_decay, n_checks): learner = get_learner(train_dataset, dataset_name, alpha) y_criterion = nn.NLLLoss() d_criterion = nn.NLLLoss() train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True) test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) itr_per_epoch = len(train_loader) n_epochs = n_iter // itr_per_epoch best_valid_acc = 0. final_test_acc = 0. acc_invariant_curve = {} dataset_name_and_test_key = dataset_name + '_' + str( test_dataset.domain_keys[0]) y_optimizer = optim.RMSprop(list(learner.E.parameters()) + list(learner.M.parameters()), lr=lr, weight_decay=weight_decay) d_optimizer = optim.RMSprop(learner.D.parameters(), lr=lr) if lr_scheduler: learner.set_lr_scheduler(y_optimizer, n_epochs * len(train_loader), lr) if alpha_scheduler: learner.set_alpha_scheduler(n_epochs * len(train_loader), annealing_func='exp') p_d_given_y = Variable(mle_for_p_d_given_y(train_dataset)).cuda() p_d_not_given_y = Variable(mle_for_p_d(train_dataset)).cuda() # ====== train ====== for epoch in range(1, n_epochs + 1): y_loss_li = [] y_acc_li = [] d_loss_li = [] kl_loss_li = [] d_acc_li = [] d_grad_li = [] for i in range(len(train_loader)): learner.scheduler_step() # update Domain Discriminator for _ in range(num_train_d): d_optimizer.zero_grad() X, _, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) d = Variable(d.long().cuda()) d_pred = learner(X, freeze_E=True, require_class=False) d_loss = d_criterion(d_pred, d) d_acc = calc_acc(d_pred, d) d_loss.backward() d_grad_li.append( np.mean([ x.grad.abs().mean().data.cpu().numpy() for x in list(learner.D.parameters()) ])) d_optimizer.step() d_loss_li.append(d_loss.data[0]) d_acc_li.append(d_acc) # update Encoder and Classifier for _ in range(num_train_e): y_optimizer.zero_grad() X, y, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) y = Variable(y.long().cuda()) y_pred, d_pred = learner(X, use_reverse_layer=False, freeze_E=False) d_pred = torch.exp(d_pred) y_loss = y_criterion(y_pred, y) if p_d == 'dependent_y': d_true = p_d_given_y[y] elif p_d == 'independent_y': d_true = p_d_not_given_y.expand_as(d_pred) else: raise Exception() kl_loss = D_KL(d_true, d_pred) loss = y_loss + kl_loss loss.backward() y_optimizer.step() y_acc = calc_acc(y_pred, y) y_loss_li.append(y_loss.data[0]) kl_loss_li.append(kl_loss.data[0]) y_acc_li.append(y_acc) y_loss_mean = np.mean(y_loss_li) y_acc_mean = np.mean(y_acc_li) d_loss_mean = np.mean(d_loss_li) d_acc_mean = np.mean(d_acc_li) d_grad_mean = np.mean(d_grad_li) kl_loss_mean = np.mean(kl_loss_li) best_valid_acc, final_test_acc = validation_step( learner, train_loader, valid_loader, test_loader, y_criterion, writer, epoch, y_loss_mean, y_acc_mean, d_loss_mean, d_acc_mean, kl_loss_mean, n_epochs, best_valid_acc, final_test_acc, d_grad_mean, acc_invariant_curve, n_checks, dataset_name_and_test_key) for d_acc, y_acc in sorted(acc_invariant_curve.items(), key=lambda x: x[0]): writer.add_scalar('%s_acc_fair_curve' % dataset_name_and_test_key, y_acc, d_acc * 10000) return final_test_acc
import numpy as np from algos import groupfair, gerfair from utils import calc_acc, calc_gerry_viol, calc_gerry_viol2 methods = [groupfair.Plugin, groupfair.WERM, gerfair.GerryFair] params_list = [{'B':[10], 'nu':np.logspace(-4,0,20), 'T':[10000], 'lr': [1], 'fairness': ['DP'], 'gfair': ['gerry'], 'lambda_update': ['subgradient']}, {'B':[10], 'nu':np.logspace(-4,0,20), 'T':[10000], 'lr': [1], 'fairness': ['DP'], 'gfair': ['gerry'], 'lambda_update': ['subgradient']}, {'C':[15], 'nu':np.logspace(-3,0,20), 'T':[1500]}] metrics_list = [ [('accuracy', lambda p,x,xp,y: calc_acc(p,y)), ('gerry_viol', lambda p,x,xp,y: calc_gerry_viol(p, xp))], [('accuracy', lambda p,x,xp,y: calc_acc(p,y)), ('gerry_viol', lambda p,x,xp,y: calc_gerry_viol(p, xp))], [('accuracy', lambda p,x,xp,y: calc_acc(p,y)), ('gerry_viol', lambda p,x,xp,y: calc_gerry_viol2(p, xp))] ]
#for sub in ['1005']: sub = str(sub) if sub not in ['1024']: # load in mask for individual subject mask = make_bilat_HPC(sub) print("Loaded mask for sub %s" % sub) # load in onsets fusiform_onsets = load_loc_stim_labels(sub) fusiform_TR_onsets = time2TR(fusiform_onsets, TR_per_run_loc) DFR_onsets = np.squeeze(load_DFR_stim_labels(sub)) DFR_TR_onsets = time2TR(DFR_onsets, TR_per_run_DFR) all_suj_acc[suj_count, :] = calc_acc(DFR_onsets) # shift for HRF shift_size = int(HRF_lag / TR) fusiform_TR_onsets_shifted = shift_timing(fusiform_TR_onsets, shift_size) DFR_TR_onsets_shifted = shift_timing(DFR_TR_onsets, shift_size) # load in fMRI data masked_fusiform_data = load_masked_loc(sub, mask) masked_DFR_data = load_all_masked_DFR_runs(sub, mask, 4) # Check dimensionality
def create_reply(text, data, previous_links, mode): """ Text is the text of the reddit submission Data is a list of lists - element one is user data, the second element is a list of top plays info json Returns a reddit reply-ready string, containing the user's profile, a table with relevant stats of that user, and a table with that user's top plays """ sim = None cheated_match = re.search( r"\(cheated\): https:\/\/osu\.ppy\.sh\/scores\/osu\/(\d+)(\/download)?", text) original_match = re.search( r"\(original\): https:\/\/osu\.ppy\.sh\/scores\/osu\/(\d+)(\/download)?", text) if cheated_match and original_match: cheated_id = int(cheated_match.group(1)) original_id = int(original_match.group(1)) cheated = ReplayID(cheated_id) original = ReplayID(original_id) sim = cg.similarity(cheated, original) modes = [ "osu", "taiko", "fruits", "mania" ] # can't use ?m=0 to specify a gamepage in userpage url unfortunately user_data = data[0] top_data = data[1] # user exists, but hasn't made any plays (ie no pp at all) if user_data["pp_raw"] is None: reply = "{}'s profile: {}\n\nThis user has not made any plays!".format( user_data["username"], API_USERS + user_data["user_id"] + "/" + modes[int(mode)]) return reply creation_date = datetime.strptime( user_data["join_date"], "%Y-%m-%d %H:%M:%S") #2018-04-15 01:44:28 difference = datetime.utcnow() - creation_date pp_raw = round(float(user_data["pp_raw"])) reply = ( "{}'s profile: {}\n\n" "| Rank | PP | Playtime | Playcount | Country | Joined |\n" ":-:|:-:|:-:|:-:|:-:|:-:\n" "| #{:,} | {} | {} hours | {:,} | {} | ~{} days ago|\n\n" "| Top Plays | Mods | PP | Accuracy | Date | Replay Download |\n" ":-:|:-:|:-:|:-:|:-:|:-:\n".format( user_data["username"], API_USERS + user_data["user_id"] + "/" + modes[int(mode)], int(user_data["pp_rank"]), "{:,}".format(pp_raw) if pp_raw != 0 else "0 (inactive)", round(int(user_data["total_seconds_played"]) / 60 / 60), # convert to hours int(user_data["playcount"]), user_data["country"], difference.days)) for play in top_data[0:LIMIT_TOP_PLAYS]: play_data = requests.get(API_BASE + "get_scores?k=" + KEY + "&b=" + play["beatmap_id"] + "&u=" + user_data["user_id"] + "&m=" + mode + "&mods=" + play["enabled_mods"]).json()[0] score_id = play_data["score_id"] replay_available = bool(int(play_data["replay_available"])) reply += ( "| [{}]({}) | {} | {:,} | {}% ({}) | {} | {} |\n".format( parse_map_data(play["beatmap_id"])["title"], "https://osu.ppy.sh/b/{}".format(play["beatmap_id"]), calc_mods(play["enabled_mods"]), round(float(play["pp"])), calc_acc(play, mode), parse_play_rank(play["rank"]), play["date"].split(" ")[0].replace( "-", "/"), # "2013-06-22 9:11:16" (api) -> "2013/06/22" "[{}]({})".format( score_id, "https://osu.ppy.sh/scores/{}/{}".format( modes[int(mode)], score_id)) if replay_available else "Unavailable")) reply += "\n\n" + previous_links # sim can be 0 which is falsey, so direct comparison to None if sim != None: reply += ( f"\n\nSimilarity of replays [{cheated_id}](https://osu.ppy.sh/scores/osu/{cheated_id}) " f"and [{original_id}](https://osu.ppy.sh/scores/osu/{original_id}): {round(sim, 2)}" ) return reply
def train(writer, train_dataset, valid_dataset, test_dataset, model, batch_size, dataset_name, lr, n_iter, lr_scheduler, alpha, num_train_d, alpha_scheduler, weight_decay, n_checks): learner = get_learner(train_dataset, dataset_name, alpha) y_criterion = nn.NLLLoss() d_criterion = nn.NLLLoss() train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True) test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) itr_per_epoch = len(train_loader) n_epochs = n_iter // itr_per_epoch if alpha_scheduler: learner.set_alpha_scheduler(n_epochs * len(train_loader), annealing_func='exp') best_valid_acc = 0. final_test_acc = 0. acc_invariant_curve = {} dataset_name_and_test_key = dataset_name + '_' + str( test_dataset.domain_keys[0]) y_optimizer = optim.RMSprop(list(learner.E.parameters()) + list(learner.M.parameters()), lr=lr, weight_decay=weight_decay) d_optimizer = optim.RMSprop( list(chain.from_iterable([list(D.parameters()) for D in learner.Ds])) + list(learner.D.parameters()), lr=lr) if lr_scheduler: learner.set_lr_scheduler(y_optimizer, n_epochs * len(train_loader), lr) p_y_given_d = Variable(mle_for_p_y_given_d(train_dataset)).cuda() L = train_dataset.get('num_classes') for epoch in range(1, n_epochs + 1): # ====== train ====== y_loss_li = [] y_acc_li = [] d_loss_li = [] d_acc_li = [] d_norm_loss_li = [] d_norm_acc_li = [] for i in range(len(train_loader)): learner.scheduler_step() y_optimizer.zero_grad() # update Domain Discriminator for _ in range(num_train_d): d_optimizer.zero_grad() X, y, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) y = Variable(y.float().cuda()) d = Variable(d.long().cuda()) d_preds = learner(X, freeze_E=True, require_class=False) d_loss = 0 d_acc = 0 for i, d_pred in enumerate(d_preds): mask = y == i d_pred_tmp = d_pred * mask.unsqueeze(1).float() d_tmp = d * mask.long() d_loss += d_criterion(d_pred_tmp, d_tmp) _, idxs = d_pred_tmp.max(1) d_acc += ( (idxs == d_tmp).float().sum() - (mask.float() - 1).abs().sum()).data[0] / len(d_tmp) d_pred = learner.pred_d_by_D(X, freeze_E=True) weight = (1 / p_y_given_d[d, y.long()]) / L d_norm_loss = (nn.NLLLoss(reduce=False)(d_pred, d) * weight).mean() _, idxs = d_pred.max(1) d_norm_acc = (idxs == d).float().sum() / len(d) (d_loss + d_norm_loss).backward() d_optimizer.step() d_loss_li.append(d_loss.data[0]) d_acc_li.append(d_acc) d_norm_loss_li.append(d_norm_loss.data[0]) d_norm_acc_li.append(d_norm_acc) # update Encoder and Classifier X, y, d = train_loader.__iter__().__next__() X = Variable(X.float().cuda()) y = Variable(y.long().cuda()) d = Variable(d.long().cuda()) y_pred, d_preds = learner(X, freeze_E=False) y_loss = y_criterion(y_pred, y) d_loss = 0 for i, d_pred in enumerate(d_preds): mask = y == i d_pred_tmp = d_pred * mask.unsqueeze(1).float() d_tmp = d * mask.long() d_loss += d_criterion(d_pred_tmp, d_tmp) d_pred = learner.pred_d_by_D(X, freeze_E=False) weight = (1 / p_y_given_d[d, y]) / L d_norm_loss = (nn.NLLLoss(reduce=False)(d_pred, d) * weight).mean() loss = y_loss + d_loss + d_norm_loss loss.backward() y_optimizer.step() y_acc = calc_acc(y_pred, y) y_loss_li.append(y_loss.data[0]) y_acc_li.append(y_acc) y_loss_mean = np.mean(y_loss_li) y_acc_mean = np.mean(y_acc_li) d_loss_mean = np.mean(d_loss_li) d_acc_mean = np.mean(d_acc_li) d_norm_loss_mean = np.mean(d_norm_loss_li) d_norm_acc_mean = np.mean(d_norm_acc_li) best_valid_acc, final_test_acc = validation_step( learner, train_loader, valid_loader, test_loader, y_criterion, writer, epoch, y_loss_mean, y_acc_mean, d_loss_mean, d_acc_mean, d_norm_loss_mean, d_norm_acc_mean, n_epochs, best_valid_acc, final_test_acc, acc_invariant_curve, n_checks, dataset_name_and_test_key) for d_acc, y_acc in sorted(acc_invariant_curve.items(), key=lambda x: x[0]): writer.add_scalar('%s_acc_fair_curve' % dataset_name_and_test_key, y_acc, d_acc * 10000) return final_test_acc