def __init__(self, config): self.device = config["device"] self.model = ConvNet(num_classes=config["num-classes"]) self.model.to(self.device) if config["resume"]: print("> Loading Checkpoint") self.model.load_state_dict(T.load(config["load-path"])) self.train_loader, self.val_loader = get_train_valid_loader( config["data-path"], config["num-classes"], config["batch-size"], config["val-batch-size"], config["augment"], config["seed"], config["valid-size"], config["shuffle"], config["num-workers"]) self.test_loader = get_test_loader( config["data-path"], config["num-classes"], config["batch-size"], config["shuffle"], config["num-workers"], config["pin-memory"]) self.criterion = nn.CrossEntropyLoss() self.optim = T.optim.AdamW(self.model.parameters(), lr=config["lr-init"], weight_decay=config["weight-decay"]) self.writer = SummaryWriter( log_dir=os.path.join("logs", config["run-title"])) self.reduce_lr = T.optim.lr_scheduler.ReduceLROnPlateau( self.optim, factor=config["lr-factor"], patience=config["lr-patience"], min_lr=config["lr-min"]) self.stopping_patience = config["stopping-patience"] self.stopping_delta = config["stopping-delta"] self.filepath = os.path.join(config["save-path"], config["run-title"], config["run-title"] + ".pt")
if args.valid: valid_len = 60000 else: valid_len = 0 kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader, valid_loader = get_train_valid_loader(data_dir='./data.svhn', batch_size=args.batch_size, augment=True, random_seed=args.seed, valid_len=valid_len, shuffle=True, show_sample=False, **kwargs) test_loader = get_test_loader(data_dir='./data.svhn', batch_size=args.batch_size, shuffle=True, **kwargs) last_prec1 = 0 model = None cfg = None if args.model: if os.path.isfile(args.model): checkpoint = torch.load(args.model) cfg = checkpoint['cfg'] model = preresnet(dataset=args.dataset, depth=args.depth, cfg=cfg) # print(cfg) # print(model) # print(checkpoint['state_dict']) model.load_state_dict(checkpoint['state_dict']) last_prec1 = checkpoint['best_prec1']
return model def cifar100(n_channel, pretrained=None): cfg = [n_channel, n_channel, 'M', 2*n_channel, 2*n_channel, 'M', 4*n_channel, 4*n_channel, 'M', (8*n_channel, 0), 'M'] layers = make_layers(cfg, batch_norm=True) model = CIFAR(layers, n_channel=8*n_channel, num_classes=100) if pretrained is not None: m = model_zoo.load_url(model_urls['cifar100']) state_dict = m.state_dict() if isinstance(m, nn.Module) else m assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict) model.load_state_dict(state_dict) return model if __name__ == "__main__": device = 'cuda' model = cifar100(128, pretrained=True).to(device) model.eval() test_loader = get_test_loader("./data", num_classes=100, batch_size=32) total, correct = 0, 0 for data in test_loader: images, labels = data[0].to(device), data[1].to(device) outputs = model(images) _, predicted = T.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() acc = correct / total print(f"Testing Accuracy: {acc*100:.4f}%")
from models import UNet, contour_SEResUNet parser = argparse.ArgumentParser(description='Kaggle Cdiscounts Training') parser.add_argument('--gpu', default=1, type=int, help='which gpu to run') parser.add_argument('--batch_size', default=16, type=int, help='size of batches') parser.add_argument('--img_size', default=448, type=int, help='height and width of images to use') args = parser.parse_args() net = contour_SEResUNet().cuda() net.load_state_dict(torch.load('../models-pytorch/best_SEResUNet_Contour_flips_l1_lamb0.5.pth')) net.eval() test_loader = get_test_loader(imsize=args.img_size) # Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python def rle_encoding(x): dots = np.where(x.T.flatten() == 1)[0] run_lengths = [] prev = -2 for b in dots: if (b>prev+1): run_lengths.extend((b + 1, 0)) run_lengths[-1] += 1 prev = b return run_lengths def prob_to_rles(x): # watershed instance generation #x = np.where(x > 0.5, 1, 0)
res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0) res.append(correct_k.mul_(100.0 / batch_size)) return res #%% train_loader, val_loader, train_dataset = utils.get_train_valid_loader(args.train_dir, batch_size=args.batch_size, crop_size=args.crop_size, augment=args.augment, random_seed=111, shuffle=True, valid_size=args.valid_size, filtering=args.filtering, num_channels=args.num_channels, l2_loss=args.l2_loss, same_crop=args.same_crop, num_workers=args.num_workers) test_loader, test_dataset = utils.get_test_loader(TEST_DIR, batch_size=args.batch_size, crop_size=args.crop_size, filtering=args.filtering, num_channels=args.num_channels, l2_loss=args.l2_loss, num_workers=args.num_workers) #val_loader, val_dataset = utils.get_val_loader(VAL_DIR, batch_size=args.batch_size, crop_size=args.crop_size, filtering=args.filtering, num_channels=args.num_channels, l2_loss=args.l2_loss) print(train_dataset.classes) print(args) if args.arch.startswith('my'): model = myresnet.ResNet18() else: #original_model = models.resnet101(pretrained=True) original_model = globals()[args.arch](pretrained=args.pretrained) if args.finetune: for param in original_model.parameters(): param.requires_grad = False model = utils.FineTuneModel(original_model, args.arch, 10, num_channels=args.num_channels)
import torch.nn as nn import torch.optim as optim from torch.autograd import Variable import time from sklearn.manifold import TSNE import matplotlib.pyplot as plt import pylab from tensorboardX import SummaryWriter import torchvision.utils as vutils import utils import models import params import train, test src_train_dataloader = utils.get_train_loader('MNIST') src_test_dataloader = utils.get_test_loader('MNIST') tgt_train_dataloader = utils.get_train_loader('MNIST_M') tgt_test_dataloader = utils.get_test_loader('MNIST_M') common_net = models.Extractor() src_net = models.Classifier() tgt_net = models.Classifier() src_dataiter = iter(src_train_dataloader) tgt_dataiter = iter(tgt_train_dataloader) src_imgs, src_labels = next(src_dataiter) tgt_imgs, tgt_labels = next(tgt_dataiter) src_imgs_show = src_imgs[:4] tgt_imgs_show = tgt_imgs[:4]
with open(os.path.join(root_path, 'setting.json')) as f: SETTINGS = json.load(f) model_ckp = os.path.join(root_path, '{net}-{idx}-{epoch}-{type}.pth') net = get_network(net=SETTINGS['NET'], num_classes=SETTINGS['STEP_CLASSES'], input_channels=3) if not os.path.exists(eval_path): os.makedirs(eval_path) os.makedirs(eval_path+'/OldAccuracy', exist_ok=True) os.makedirs(eval_path+'/NewAccuracy', exist_ok=True) os.makedirs(eval_path+'/AvgIncrementalAccuracy', exist_ok=True) incremental_accuracy = [] for iteration, sequence in enumerate(SETTINGS['TRAINING_BATCHES']): test_loader = get_test_loader(dataset=SETTINGS['DATASET'], accepted_class_labels=sequence, num_workers=0) cum_old_accuracies = [0] new_accuracies = [] for epoch in range(SETTINGS['EPOCH']): net.load_state_dict(torch.load(model_ckp.format(net=SETTINGS['NET'], idx=iteration, epoch=epoch, type='end'))) current_acc = evaluate(net, test_loader, label_correction=iteration*SETTINGS['STEP_CLASSES']) old_accuracies = [] for old_iteration in range(iteration): old_sequence = SETTINGS['TRAINING_BATCHES'][old_iteration] old_test_loader = get_test_loader(dataset=SETTINGS['DATASET'], accepted_class_labels=old_sequence, num_workers=0) acc = evaluate(net, old_test_loader, label_correction=old_iteration*SETTINGS['STEP_CLASSES']) old_accuracies.append(acc.cpu().numpy()) new_accuracies.append(current_acc.cpu().numpy()) if iteration > 0: cum_old_accuracies.append(np.mean(np.asarray(old_accuracies)))
from transformers import BertModel, BertConfig, BertTokenizer, BertTokenizerFast, AdamW, get_linear_schedule_with_warmup ###BERT model instead of the Extractor # create the BERTConfig, BERTTokenizer, and BERTModel model_name = "bert-base-uncased" config = BertConfig.from_pretrained(model_name, output_hidden_states=True, return_dict=True) tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True) bert = BertModel.from_pretrained(model_name, config=config) src_train_dataloader = utils.get_train_loader( '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/books.csv', tokenizer) src_test_dataloader = utils.get_test_loader( '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/books.csv', tokenizer) tgt_train_dataloader = utils.get_train_loader( '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/dvd.csv', tokenizer) tgt_test_dataloader = utils.get_test_loader( '/content/drive/My Drive/Data_summarization/pytorch_DAN/data/dvd.csv', tokenizer) common_net = bert src_net = models.BertForSequenceClassification(config, common_net) tgt_net = models.BertForSequenceClassification(config, common_net) src_dataiter = iter(src_train_dataloader) tgt_dataiter = iter(tgt_train_dataloader)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) fix_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, space=args.search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_queue, train_sampler, valid_queue = utils.get_train_validation_loader( args) test_queue = utils.get_test_loader(args) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) best_acc = 0 total_train_time, total_valid_time, total_test_time = 0, 0, 0 for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training architect.alpha_forward = 0 architect.alpha_backward = 0 start_time = time.time() train_acc, train_obj, alphas_time, forward_time, backward_time = \ train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,epoch) logging.info('train_acc %f', train_acc) end_time = time.time() search_time = end_time - start_time total_train_time += search_time logging.info("train time %f", end_time - start_time) logging.info("alphas_time %f ", alphas_time) logging.info("forward_time %f", forward_time) logging.info("backward_time %f", backward_time) logging.info("alpha_forward %f", architect.alpha_forward) logging.info("alpha_backward %f", architect.alpha_backward) logging.info('train_acc %f', train_acc) # validation # if args.epochs-epoch<=1: # valid_acc, valid_obj = infer(valid_queue, model, criterion) # logging.info('valid_acc %f', valid_acc) # utils.save(model, os.path.join(args.save, 'weights.pt')) start_time2 = time.time() valid_acc, valid_obj = infer(valid_queue, model, criterion) end_time2 = time.time() valid_time = end_time2 - start_time2 total_valid_time += valid_time logging.info("inference time %f", end_time2 - start_time2) logging.info('valid_acc %f', valid_acc) # test start = time.time() test_acc, test_obj = infer(test_queue, model, criterion) end = time.time() test_time = end - start total_test_time += test_time logging.info("inference time %f", end - start) logging.info('test_acc %f, test_obj %f', test_acc, test_obj) # update learning rate scheduler.step() is_best = valid_acc > best_acc best_acc = max(valid_acc, best_acc) if is_best: logging.info( 'best valid_acc: {} at epoch: {}, test_acc: {}'.format( best_acc, epoch, test_acc)) logging.info('Current best genotype = {}'.format(model.genotype())) return total_train_time, total_valid_time, total_test_time
def main(): root = logging.getLogger() if not torch.cuda.is_available(): root.info('no gpu device available') sys.exit(1) # Fix seed utils.fix_seed(args.seed) root.info('gpu device = %d' % args.gpu) root.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.greedy, args.l2) model = model.cuda() root.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Data loading code train_queue, train_sampler, valid_queue = utils.get_train_validation_loader( args) test_queue = utils.get_test_loader(args) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) best_acc = 0 for epoch in range(args.epochs): lr = scheduler.get_lr()[0] log_value("lr", lr, epoch) root.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() root.info('genotype = %s', genotype) # training architect.alpha_forward = 0 architect.alpha_backward = 0 start_time = time.time() train_acc, train_obj, alphas_time, forward_time, backward_time = \ train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) end_time = time.time() root.info("train time %f", end_time - start_time) root.info("alphas_time %f ", alphas_time) root.info("forward_time %f", forward_time) root.info("backward_time %f", backward_time) root.info("alpha_forward %f", architect.alpha_forward) root.info("alpha_backward %f", architect.alpha_backward) log_value('train_acc', train_acc, epoch) root.info('train_acc %f', train_acc) # validation start_time2 = time.time() valid_acc, valid_obj = infer(valid_queue, model, criterion) end_time2 = time.time() root.info("inference time %f", end_time2 - start_time2) log_value('valid_acc', valid_acc, epoch) root.info('valid_acc %f', valid_acc) # test start = time.time() test_acc, test_obj = infer(test_queue, model, criterion) end = time.time() root.info("inference time %f", end - start) log_value('test_acc', test_acc, epoch) root.info('test_acc %f, test_obj %f', test_acc, test_obj) # update learning rate scheduler.step() is_best = valid_acc > best_acc best_acc = max(valid_acc, best_acc) if is_best: root.info('best valid_acc: {} at epoch: {}, test_acc: {}'.format( best_acc, epoch, test_acc)) root.info('Current best genotype = {}'.format(model.genotype())) utils.save(model, os.path.join(args.save, 'best_weights.pt'))
def main(): classes = [i for i in range(SETTINGS['NUM_CLASSES'])] training_batches = [ classes[i:i + SETTINGS['STEP_CLASSES']] for i in range(0, len(classes), SETTINGS['STEP_CLASSES']) ] SETTINGS['TRAINING_BATCHES'] = training_batches checkpoint_path = os.path.join( SETTINGS['CHECKPOINT_ROOT'], SETTINGS['DATASET'], 'StepClasses-{}'.format(str(SETTINGS['STEP_CLASSES'])), 'BufferSamples-{}'.format(str(SETTINGS['K_SHOT'])), SETTINGS['NET'], SETTINGS['TIME_NOW']) if not os.path.exists(checkpoint_path): Path(checkpoint_path).mkdir(parents=True, exist_ok=True) model_ckp_path = os.path.join(checkpoint_path, '{net}-{idx}-{epoch}-{type}.pth') save_setting(SETTINGS, checkpoint_path) net = get_network(net=SETTINGS['NET'], num_classes=SETTINGS['STEP_CLASSES'], input_channels=3) norm_alpha_loss = torch.nn.MSELoss() norm_triangle_loss = torch.nn.MSELoss() ce_criterion = torch.nn.CrossEntropyLoss() zero_img = torch.zeros(size=[1, 3, 32, 32]) zero_label = torch.zeros(size=[1, 512]) old_classes = [] for iteration, training_sequence in enumerate(training_batches): if not os.path.exists( os.path.join(checkpoint_path, 'Plots', str(iteration))): base_path = os.path.join(checkpoint_path, 'Plots', str(iteration)) base_gradients_path = os.path.join(base_path, 'Gradients') g_zero_path = os.path.join(base_gradients_path, 'L_Zero') g_alpha_path = os.path.join(base_gradients_path, 'L_Alpha') g_triangle_path = os.path.join(base_gradients_path, 'L_Triangle') loss_path = os.path.join(base_path, 'LossPlots') embedding_path = os.path.join(base_path, 'EmbeddingPlots') Path(loss_path).mkdir(parents=True, exist_ok=True) Path(embedding_path).mkdir(parents=True, exist_ok=True) Path(g_zero_path).mkdir(parents=True, exist_ok=True) Path(g_alpha_path).mkdir(parents=True, exist_ok=True) Path(g_triangle_path).mkdir(parents=True, exist_ok=True) training_loader = get_train_loader( SETTINGS['DATASET'], accepted_class_labels=training_sequence, norm_lambda=SETTINGS['NORM_LAMBDA'], batch_size=SETTINGS['BATCH_SIZE']) old_classes.extend(training_sequence) test_loader = get_test_loader(SETTINGS['DATASET'], accepted_class_labels=old_classes, batch_size=5 * SETTINGS['BATCH_SIZE']) if iteration == 0: EPOCH = SETTINGS['STARTING_EPOCH'] lr = SETTINGS['STARTING_LEARNING_RATE'] else: EPOCH = SETTINGS['OTHER_EPOCHS'] lr = SETTINGS['OTHER_LEARNING_RATE'] ce_optimizer = optim.SGD(params=net.parameters(), lr=lr, momentum=0.9) triangle_optimizer = optim.SGD(params=net.parameters(), lr=lr, momentum=0.9) zero_optimizer = optim.SGD(params=net.parameters(), lr=lr, momentum=0.9) for epoch in range(EPOCH): print('Processing iteration: {}\nEpoch:{}'.format( iteration, epoch)) for batch_idx, data in enumerate(training_loader): x, y, alpha, x2, y2, x_alpha, x_convex = data y = y - iteration * SETTINGS['STEP_CLASSES'] if mode == MODE.SUPER_DEBUG: print('---INPUT SHAPES---') print(x.shape, y.shape, alpha.shape, x2.shape, y2.shape, x_alpha.shape, x_convex.shape) net.eval() with torch.no_grad(): _, x2_features = net(x2.cuda()) _, alpha_x_features = net(x_alpha.cuda()) alpha_sq = torch.unsqueeze(alpha, dim=1) if 'CE' in SETTINGS['LOSSES']: net.train() net.zero_grad() preds, x_features = net(x.cuda()) l_ce = ce_criterion(preds, y.cuda()) l_ce.backward(retain_graph=True) ce_gradients = get_gradient_magnitudes(net) plot_gradients(ce_gradients, g_alpha_path, '{}--{}'.format(epoch, batch_idx)) del ce_gradients ce_optimizer.step() else: l_ce = DummyLoss() """net.train() net.zero_grad() _, x_features = net(x.cuda()) x_norm = torch.unsqueeze(torch.norm(x_features, p=2, dim=1), dim=1) alpha_sq = torch.unsqueeze(alpha, dim=1) alpha_x_norm = torch.unsqueeze(torch.norm(alpha_x_features, p=2, dim=1), dim=1) # print(alpha_sq.shape, x_norm.shape, alpha_x_norm.shape) l_a = norm_alpha_loss(x_norm*alpha_sq.cuda(), alpha_x_norm) l_a.backward(retain_graph=True) alpha_gradients = get_gradient_magnitudes(net) plot_gradients(alpha_gradients, g_alpha_path, '{}--{}'.format(epoch, batch_idx)) del alpha_gradients ce_optimizer.step()""" if 'TRIANGLE' in SETTINGS['LOSSES']: net.train() net.zero_grad() _, cvx_features = net(x_convex.cuda()) l_t = norm_triangle_loss( torch.log( torch.unsqueeze(torch.norm(cvx_features, p=2, dim=1), dim=1)), torch.log( alpha_sq.cuda() * torch.unsqueeze( torch.norm(x_features, p=2, dim=1), dim=1) + (1 - alpha_sq.cuda()) * torch.unsqueeze( torch.norm(x2_features, p=2, dim=1), dim=1))) l_t.backward() triangle_gradients = get_gradient_magnitudes(net) plot_gradients(triangle_gradients, g_triangle_path, '{}--{}'.format(epoch, batch_idx)) del triangle_gradients triangle_optimizer.step() else: l_t = DummyLoss() """net.zero_grad() _, zero_features = net(zero_img.cuda()) l_z = zero_loss(zero_features)/SETTINGS['BATCH_SIZE'] l_z.backward() zero_gradients = get_gradient_magnitudes(net) plot_gradients(zero_gradients, g_zero_path, '{}--{}'.format(epoch, batch_idx)) del zero_gradients zero_optimizer.step()""" plot_norm_losses(l_ce.item(), l_t.item(), 0, path=loss_path, fid='Epoch:{}--BatchNo:{}'.format( epoch, batch_idx)) train_acc = evaluate(net, training_loader, label_correction=iteration * SETTINGS['STEP_CLASSES']) print('Training accuracy: {}'.format(train_acc)) test_features = None test_labels = None for data in test_loader: x_test, y_test, _, _, _, _, _ = data net.eval() with torch.no_grad(): _, x_test_features = net(x_test.cuda()) if test_features is None: test_features = x_test_features.cpu() test_labels = y_test.cpu() else: test_features = torch.cat( [test_features, x_test_features.cpu()], dim=0) test_labels = torch.cat( [test_labels, y_test.cpu()], dim=0) plot_embedding(test_features.numpy(), test_labels.numpy(), num_classes=len(old_classes), filepath=embedding_path, filename='Epoch:{}'.format(epoch)) torch.save( net.state_dict(), model_ckp_path.format(net=SETTINGS['NET'], idx=iteration, epoch=epoch, type='end'))