def main(): net = ConvNet() net.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #SGD with momentum classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # load snapshot snapshot = torch.load('./convnet_CIFAR10_snapshot/snapshot2_12000') net.load_state_dict(snapshot['state_dict']) optimizer.load_state_dict(snapshot['optimizer']) data_loader = load_data('./CIFAR10_data') image_num = random.randint(0,len(data_loader.dataset)-1) img = np.transpose(data_loader.dataset[image_num][0].numpy(2,0,1)) img = np.rot90(img,k=3) img = img / 2 + 0.5 # unnormalize print(classes[data_loader.dataset[image_num][1]]) plt.imshow(img) plt.title('image: %d, %s' % (image_num, str(classes[data_loader.dataset[image_num][1]]))) plt.axis('off') # plt.show() img_size = data_loader.dataset[image_num][0].size() input = torch.FloatTensor(1, img_size[0], img_size[1],img_size[2]).zero_() input[0,:] = data_loader.dataset[image_num][0] output = net(Variable(input.cuda())) print(output) _, predicted = torch.max(output.data, 1) print('Predicted: %s' % classes[predicted[0][0]]) # remind that the input has a 4D dimension (batch, channel, width, height)
visdom_log['test_loss'].append(loss_test) return plot, visdom_log if __name__ == "__main__": logs = [] colors = [] trace_names = [] start_t = time.time() print("\n\n > Teacher training ... ") colors.append('orange') trace_names.extend(['Teacher Train', 'Teacher Test']) model = ConvNet(net_dataset=CIFAR10) model.cuda() plot, log_base = run_training(model, 'Teacher_', args.epochs) logs.append(log_base) # wider student training print("\n\n > Wider Student training ... ") colors.append('blue') trace_names.extend(['Wider Net2Net Train', 'Wider Net2Net Test']) model_ = ConvNet(net_dataset=CIFAR10) model_ = copy.deepcopy(model) del model model = model_ model.wider(operation='net2net', widening_factor=2) print model plot, log_net2net = run_training(model, 'Wider_student_', args.epochs,
def main(): batch_size = 50 [train_loader, test_loader] = load_data('./CIFAR10_data', batch_size) use_cuda = True snapshot_folder = './convnet_CIFAR10_snapshot' training = True snapshot_interval = 100 # batch interval epochs = 100 load_snapshot = True snapshot_file = 'snapshot_45_1000' print('Finish loading data') classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') show_random_image = False if show_random_image: # randomly show few pictures for i in range(3): random_pick = random.randint(0, len(train_loader.dataset) - 1) show_image(train_loader, random_pick, classes) # defining neural network net = ConvNet() if use_cuda: net.cuda() # defining loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #SGD with momentum start_epoch = 1 if load_snapshot: resume_path = snapshot_folder + '/' + snapshot_file if os.path.isfile(resume_path): print("=> Loading snapshot '%s'" % (resume_path)) snapshot = torch.load(resume_path) start_epoch = snapshot['epoch'] net.load_state_dict(snapshot['state_dict']) optimizer.load_state_dict(snapshot['optimizer']) print("Successful load snapshot, net state at epoch %d" % snapshot['epoch']) if snapshot['batch_end']: start_epoch = start_epoch + 1 print('Snapshot reach batch end, start next epoch') else: print("No checkpoint found at '{}', training starts from epoch 1". format(resume_path)) # training if training: if not os.path.isdir(snapshot_folder): os.mkdir(snapshot_folder) for epoch in range( epochs ): # loop over the dataset multiple times, 1 epoch equals to 1 loop over if epoch + 1 >= start_epoch: print('Training starts at epoch %d' % (epoch + 1)) train(train_loader, net, epoch, criterion, optimizer, use_cuda, snapshot_folder, snapshot_interval) test(test_loader, net, use_cuda) print("Finish training")
# data train_loader = torch.utils.data.DataLoader(datasets.MNIST( data_dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=1000, shuffle=True) use_cuda = torch.cuda.is_available() model = ConvNet(num_classes=10) if use_cuda: model = model.cuda() optimizer = optim.SGD(model.parameters(), lr=1e-2) criterion = F.nll_loss def train(epoch): print('\nEpoch: %d' % epoch) model.train() train_loss = 0 reg_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets_a, targets_b, lam = mixup_data(
colors = [] trace_names = [] if args.plot_name is not None: visdom_live_plot = PlotLearning( './plots/cifar/', 10, plot_name=args.plot_name, env_name=args.env_name) else: visdom_live_plot = None start_time = time.time() print("\n\n > Teacher (Base Network) training ... ") net_type = 'Teacher' colors.append('orange') trace_names.extend(['Teacher Train', 'Teacher Test']) teacher_model = ConvNet(net_dataset=CIFAR10) teacher_model.cuda() optimizer = get_optimizer(teacher_model) scheduler = get_scheduler(optimizer) print teacher_model log_base, win_accuracy, win_loss = start_training( teacher_model, net_type, optimizer, scheduler, visdom_live_plot) logs.append(log_base) save_optimizer_scheduler(optimizer, scheduler, net_type) end_time = time.time() print 'time to train teacher network:', print end_time-start_time # wider student training from Net2Net print("\n\n > Wider Student training (Net2Net)... ") net_type = 'WideNet2Net' colors.append('blue')