def get_model(depth, width_factor, dropout, in_channels=3, labels=10): model = WideResNet(depth, width_factor, dropout, in_channels=in_channels, labels=labels) model_cfg = { 'depth': depth, 'width_factor': width_factor, 'dropout': dropout, 'in_channels': in_channels, 'labels': labels } return model.to(DEVICE), model_cfg
class Predictor(object): def __init__(self, batch_size, fn): self.model = None self.load(fn) dataset = ImageLoader(batch_size) self.test_loader = dataset.test def load(self, fn='model.dat'): cp = torch.load(fn, map_location=DEVICE) cfg = cp['cfg'] self.model = WideResNet(**cfg).to(DEVICE) self.model.load_state_dict(cp['model_state_dict']) def inference(self): results = [] for inputs, targets in self.test_loader: outputs = self.model(inputs) results += torch.argmax(outputs.data, 1).cpu().tolist() return results
type=float, help="L2 weight decay.") parser.add_argument("--width_factor", default=8, type=int, help="How many times wider compared to normal ResNet.") args = parser.parse_args() initialize(args, seed=42) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar(args.batch_size, args.threads) log = Log(log_each=10) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) base_optimizer = torch.optim.SGD optimizer = SAM(model.parameters(), base_optimizer, rho=args.rho, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, args.learning_rate, args.epochs) for epoch in range(args.epochs): model.train() log.train(len_dataset=len(dataset.train))
parser.add_argument("--train_size", default=50000, type=int, help="How many training samples to use.") args = parser.parse_args() print(args) initialize(args, seed=42) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar(args.batch_size, args.threads, args.train_size) log = Log(log_each=10) # model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10) model = nn.DataParallel(model).cuda() base_optimizer = torch.optim.SGD optimizer = SAM(model.parameters(), base_optimizer, rho=args.rho, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # scheduler = StepLR(optimizer, args.learning_rate, args.epochs) scheduler = PolyLR(optimizer, args.learning_rate, args.epochs) test_class_accuracies = np.zeros((10), dtype=float)
default=0.0005, type=float, help="L2 weight decay.") parser.add_argument("--width_factor", default=8, type=int, help="How many times wider compared to normal ResNet.") args = parser.parse_args() initialize(args, seed=42) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar100(args.batch_size, args.threads) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) PATH = './trained_models/sam_net_250.pth' model.load_state_dict(torch.load(PATH)) predict_all = np.array([]) correct_all = np.array([], dtype=bool) targets_all = np.array([], dtype=int) with torch.no_grad(): for batch in dataset.train: inputs, targets = (b.to(device) for b in batch) rands = torch.clone(targets) for r, i in zip(rands.data, range(128)): rands.data[i] = randint(0, 9)
def load(self, fn='model.dat'): cp = torch.load(fn, map_location=DEVICE) cfg = cp['cfg'] self.model = WideResNet(**cfg).to(DEVICE) self.model.load_state_dict(cp['model_state_dict'])
type=float, help="L2 weight decay.") parser.add_argument("--width_factor", default=8, type=int, help="How many times wider compared to normal ResNet.") args = parser.parse_args() initialize(args, seed=42) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar(args.batch_size, args.threads) log = Log(log_each=10) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) if args.optimizer == "ada_hessian": optimizer = AdaHessian(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, update_each=args.update_each, average_conv_kernel=args.average_conv_kernel) else: optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, nesterov=True, weight_decay=args.weight_decay)
parser.add_argument("--epochs", default=200, type=int, help="Total number of epochs.") parser.add_argument("--label_smoothing", default=0.1, type=float, help="Use 0.0 for no label smoothing.") parser.add_argument("--learning_rate", default=0.1, type=float, help="Base learning rate at the start of the training.") parser.add_argument("--momentum", default=0.9, type=float, help="SGD Momentum.") parser.add_argument("--threads", default=2, type=int, help="Number of CPU threads for dataloaders.") parser.add_argument("--rho", default=2.0, type=int, help="Rho parameter for SAM.") parser.add_argument("--weight_decay", default=0.0005, type=float, help="L2 weight decay.") parser.add_argument("--width_factor", default=8, type=int, help="How many times wider compared to normal ResNet.") args = parser.parse_args() initialize(args, seed=42) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar(args.batch_size, args.threads) log = Log(log_each=10) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) base_optimizer = torch.optim.SGD optimizer = SAM(model.parameters(), base_optimizer, rho=args.rho, adaptive=args.adaptive, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, args.learning_rate, args.epochs) for epoch in range(args.epochs): model.train() log.train(len_dataset=len(dataset.train)) for batch in dataset.train: inputs, targets = (b.to(device) for b in batch) # first forward-backward step enable_running_stats(model) predictions = model(inputs)
parser.add_argument("--epochs", default=200, type=int, help="Total number of epochs.") parser.add_argument("--label_smoothing", default=0.1, type=float, help="Use 0.0 for no label smoothing.") parser.add_argument("--learning_rate", default=0.01, type=float, help="Base learning rate at the start of the training.") parser.add_argument("--optimizer", default="ada_hessian", type=str, help="Type of optimizer, supported values are {'ada_hessian', SGD'}.") parser.add_argument("--threads", default=2, type=int, help="Number of CPU threads for dataloaders.") parser.add_argument("--update_each", default=1, type=int, help="Delayed hessian update.") parser.add_argument("--weight_decay", default=0.0005, type=float, help="L2 weight decay.") parser.add_argument("--width_factor", default=8, type=int, help="How many times wider compared to normal ResNet.") args = parser.parse_args() initialize(args, seed=42) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = Cifar(args.batch_size, args.threads) log = Log(log_each=10) model = WideResNet(args.depth, args.width_factor, args.dropout, in_channels=3, labels=10).to(device) if args.optimizer == "ada_hessian": optimizer = AdaHessian(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, update_each=args.update_each) else: optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, nesterov=True, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, args.learning_rate, args.epochs) for epoch in range(args.epochs): model.train() log.train(len_dataset=len(dataset.train)) for i, (inputs, labels) in enumerate(dataset.train): optimizer.zero_grad() outputs = model(inputs.to(device))