def getInput(): inp = input('Your Input: ') text = validate(inp) if text: return text return None
def real_test(val_loader, criterion, model, Emodel): print('==> Before contracting') print('......ExpandNet TEST............') Etest_acc = utils.validate(val_loader, Emodel, criterion, args, epoch=0) print('ExpandNet test_acc: {:.4f}'.format(Etest_acc)) print('......Original ComapctNet TEST............') test_acc = utils.validate(val_loader, model, criterion, args, epoch=0) print('Original ComapctNet test_acc: {:.4f}'.format(test_acc)) print('====> Contracting............') new_model = compute_new_weights.expandnet_contract(model, Emodel, args) print('======> After contracting') print('......Contracted CompactNet TEST............') test_acc = utils.validate(val_loader, new_model.cuda(args.gpu), criterion, args, epoch=0) print('Contracted CompactNet test_acc: {:.4f}'.format(test_acc))
def test(self, test_data, test_labels, K, fwd_mask): if self.log_level > 0: print "Test model" (pred_targets, pred_labels) = self.predict(test_data, fwd_mask) (precision, recall, f1, sup) = utils.validate(test_labels, pred_labels, K) if self.log_level > 0: print precision print recall print f1 return precision, recall, f1
outputs = net(images) outputs.detach_() loss = criterion(outputs, labels) loss_sigma += loss.item() _, predicted = torch.max(outputs.data, 1) for j in range(len(labels)): conf_mat[labels[j].numpy(), predicted[j].numpy()] += 1.0 print('valid set Acc:{:.2%}'.format(conf_mat.trace() / conf_mat.sum())) writer.add_scalars('Loss_group', {'valid_loss': loss_sigma / len(valid_data)}, epoch) writer.add_scalars('Accuracy_group', {'valid_acc': conf_mat.trace() / conf_mat.sum()}, epoch) scheduler.step() # 更新学习率 print('Finished Training') # ------------------------------------ step5: 保存模型 并且绘制混淆矩阵图 ------------------------------------ net_save_path = os.path.join(log_dir, 'net_params.pkl') torch.save(net.state_dict(), net_save_path) conf_mat_train, train_acc = validate(net, train_loader, 'train', classes_name) conf_mat_valid, valid_acc = validate(net, valid_loader, 'valid', classes_name) show_confMat(conf_mat_train, classes_name, 'train', log_dir) show_confMat(conf_mat_valid, classes_name, 'valid', log_dir)
def main(): global args parser = arg_parser() args = parser.parse_args() cudnn.benchmark = True num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config( args.dataset, args.use_lmdb) args.num_classes = num_classes if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args) mean = model.mean(args.modality) std = model.std(args.modality) # overwrite mean and std if they are presented in command if args.mean is not None: if args.modality == 'rgb': if len(args.mean) != 3: raise ValueError( "When training with rgb, dim of mean must be three.") elif args.modality == 'flow': if len(args.mean) != 1: raise ValueError( "When training with flow, dim of mean must be three.") mean = args.mean if args.std is not None: if args.modality == 'rgb': if len(args.std) != 3: raise ValueError( "When training with rgb, dim of std must be three.") elif args.modality == 'flow': if len(args.std) != 1: raise ValueError( "When training with flow, dim of std must be three.") std = args.std model = model.cuda() model.eval() if args.threed_data: dummy_data = (3, args.groups, args.input_size, args.input_size) else: dummy_data = (3 * args.groups, args.input_size, args.input_size) model_summary = torchsummary.summary(model, input_size=dummy_data) torch.cuda.empty_cache() if args.show_model: print(model) print(model_summary) return 0 model = torch.nn.DataParallel(model).cuda() if args.pretrained is not None: print("=> using pre-trained model '{}'".format(arch_name)) checkpoint = torch.load(args.pretrained, map_location='cpu') if args.transfer: new_dict = {} for k, v in checkpoint['state_dict'].items(): # TODO: a better approach: if k.replace("module.", "").startswith("fc"): continue new_dict[k] = v else: new_dict = checkpoint['state_dict'] model.load_state_dict(new_dict, strict=False) else: print("=> creating model '{}'".format(arch_name)) # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda() val_criterion = nn.CrossEntropyLoss().cuda() # Data loading code video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet val_list = os.path.join(args.datadir, val_list_name) val_augmentor = get_augmentor(False, args.input_size, mean, std, args.disable_scaleup, threed_data=args.threed_data, version=args.augmentor_ver, scale_range=args.scale_range) val_dataset = video_data_cls(args.datadir, val_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=val_augmentor, is_train=False, test_mode=False, seperator=filename_seperator, filter_video=filter_video) val_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') flops, params = extract_total_flops_params(model_summary) print(model_summary) val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True, file=logfile) return train_list = os.path.join(args.datadir, train_list_name) train_augmentor = get_augmentor(True, args.input_size, mean, std, threed_data=args.threed_data, version=args.augmentor_ver, scale_range=args.scale_range) train_dataset = video_data_cls(args.datadir, train_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=train_augmentor, is_train=True, test_mode=False, seperator=filename_seperator, filter_video=filter_video) train_loader = build_dataflow(train_dataset, is_train=True, batch_size=args.batch_size, workers=args.workers) sgd_polices = model.parameters() optimizer = torch.optim.SGD(sgd_polices, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1) elif args.lr_scheduler == 'multisteps': scheduler = lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) elif args.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) best_top1 = 0.0 tensorboard_logger.configure(os.path.join(log_folder)) # optionally resume from a checkpoint if args.resume: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) try: scheduler.load_state_dict(checkpoint['scheduler']) except: pass print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError("Checkpoint is not found: {}".format(args.resume)) else: if os.path.exists(os.path.join(log_folder, 'log.log')): shutil.copyfile( os.path.join(log_folder, 'log.log'), os.path.join(log_folder, 'log.log.{}'.format(int(time.time())))) logfile = open(os.path.join(log_folder, 'log.log'), 'w') command = " ".join(sys.argv) print(command, flush=True) print(args, flush=True) print(model, flush=True) print(model_summary, flush=True) print(command, file=logfile, flush=True) print(args, file=logfile, flush=True) if args.resume == '': print(model, file=logfile, flush=True) print(model_summary, flush=True, file=logfile) for epoch in range(args.start_epoch, args.epochs): if args.lr_scheduler == 'plateau': scheduler.step(val_losses, epoch) else: scheduler.step(epoch) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.optimizer.param_groups[0]['lr'] except: lr = None # set current learning rate # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1, display=args.print_freq, label_smoothing=args.label_smoothing, clip_gradient=args.clip_gradient) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = { 'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } save_checkpoint(save_dict, is_best, filepath=log_folder) if lr is not None: tensorboard_logger.log_value('learning-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) logfile.close()
correct += (predicted == labels).squeeze().sum().cpu().numpy() loss_sigma += loss.item() # 每10个iteration 打印一次训练信息,loss为10个iteration的平均 if i % 10 == 9: loss_avg = loss_sigma / 10 loss_sigma = 0.0 print( "Testing: Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format( i + 1, len(test_dataloader), loss_avg, correct / total)) # 记录训练loss writer.add_scalars('Loss_group', {'test_loss': loss_avg}, i) # 记录learning rate #writer.add_scalar('learning rate', scheduler.get_lr()[0], epoch) # 记录Accuracy writer.add_scalars('Accuracy_group', {'test_acc': correct / total}, i) # ------------------------------------ step5: 保存模型 并且绘制混淆矩阵图 ------------------------------------ net_save_path = os.path.join(log_dir, 'net_params.pkl') torch.save(net.state_dict(), net_save_path) conf_mat_train, train_acc = validate(net, train_dataloader, 'train', classes_name) conf_mat_valid, valid_acc = validate(net, val_dataloader, 'valid', classes_name) conf_mat_test, test_acc = validate(net, test_dataloader, 'test', classes_name) show_confMat(conf_mat_train, classes_name, 'train', log_dir) show_confMat(conf_mat_valid, classes_name, 'valid', log_dir) show_confMat(conf_mat_test, classes_name, 'test', log_dir)
def main_worker(gpu, ngpus_per_node, args): cudnn.benchmark = args.cudnn_benchmark args.gpu = gpu num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config( args.dataset, args.use_lmdb) args.num_classes = num_classes if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args) mean = model.mean(args.modality) std = model.std(args.modality) # overwrite mean and std if they are presented in command if args.mean is not None: if args.modality == 'rgb': if len(args.mean) != 3: raise ValueError( "When training with rgb, dim of mean must be three.") elif args.modality == 'flow': if len(args.mean) != 1: raise ValueError( "When training with flow, dim of mean must be three.") mean = args.mean if args.std is not None: if args.modality == 'rgb': if len(args.std) != 3: raise ValueError( "When training with rgb, dim of std must be three.") elif args.modality == 'flow': if len(args.std) != 1: raise ValueError( "When training with flow, dim of std must be three.") std = args.std model = model.cuda(args.gpu) model.eval() if args.threed_data: dummy_data = (args.input_channels, args.groups, args.input_size, args.input_size) else: dummy_data = (args.input_channels * args.groups, args.input_size, args.input_size) if args.rank == 0: model_summary = torchsummary.summary(model, input_size=dummy_data) torch.cuda.empty_cache() if args.show_model and args.rank == 0: print(model) print(model_summary) return 0 if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have # the batch size should be divided by number of nodes as well args.batch_size = int(args.batch_size / args.world_size) args.workers = int(args.workers / ngpus_per_node) if args.sync_bn: process_group = torch.distributed.new_group( list(range(args.world_size))) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model, process_group) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs # assign rank to 0 model = torch.nn.DataParallel(model).cuda() args.rank = 0 if args.pretrained is not None: if args.rank == 0: print("=> using pre-trained model '{}'".format(arch_name)) if args.gpu is None: checkpoint = torch.load(args.pretrained, map_location='cpu') else: checkpoint = torch.load(args.pretrained, map_location='cuda:{}'.format(args.gpu)) if args.transfer: new_dict = {} for k, v in checkpoint['state_dict'].items(): # TODO: a better approach: if k.replace("module.", "").startswith("fc"): continue new_dict[k] = v else: new_dict = checkpoint['state_dict'] model.load_state_dict(new_dict, strict=False) del checkpoint # dereference seems crucial torch.cuda.empty_cache() else: if args.rank == 0: print("=> creating model '{}'".format(arch_name)) # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda(args.gpu) val_criterion = nn.CrossEntropyLoss().cuda(args.gpu) # Data loading code val_list = os.path.join(args.datadir, val_list_name) val_augmentor = get_augmentor( False, args.input_size, scale_range=args.scale_range, mean=mean, std=std, disable_scaleup=args.disable_scaleup, threed_data=args.threed_data, is_flow=True if args.modality == 'flow' else False, version=args.augmentor_ver) video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet val_dataset = video_data_cls(args.datadir, val_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=val_augmentor, is_train=False, test_mode=False, seperator=filename_seperator, filter_video=filter_video) val_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers, is_distributed=args.distributed) log_folder = os.path.join(args.logdir, arch_name) if args.rank == 0: if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: val_top1, val_top5, val_losses, val_speed = validate(val_loader, model, val_criterion, gpu_id=args.gpu) if args.rank == 0: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') flops, params = extract_total_flops_params(model_summary) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True, file=logfile) return train_list = os.path.join(args.datadir, train_list_name) train_augmentor = get_augmentor( True, args.input_size, scale_range=args.scale_range, mean=mean, std=std, disable_scaleup=args.disable_scaleup, threed_data=args.threed_data, is_flow=True if args.modality == 'flow' else False, version=args.augmentor_ver) train_dataset = video_data_cls(args.datadir, train_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=train_augmentor, is_train=True, test_mode=False, seperator=filename_seperator, filter_video=filter_video) train_loader = build_dataflow(train_dataset, is_train=True, batch_size=args.batch_size, workers=args.workers, is_distributed=args.distributed) sgd_polices = model.parameters() optimizer = torch.optim.SGD(sgd_polices, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1) elif args.lr_scheduler == 'multisteps': scheduler = lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) elif args.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) best_top1 = 0.0 # optionally resume from a checkpoint if args.resume: if args.rank == 0: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): if args.rank == 0: print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume, map_location='cpu') else: checkpoint = torch.load(args.resume, map_location='cuda:{}'.format( args.gpu)) args.start_epoch = checkpoint['epoch'] # TODO: handle distributed version best_top1 = checkpoint['best_top1'] if args.gpu is not None: if not isinstance(best_top1, float): best_top1 = best_top1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) try: scheduler.load_state_dict(checkpoint['scheduler']) except: pass if args.rank == 0: print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint # dereference seems crucial torch.cuda.empty_cache() else: raise ValueError("Checkpoint is not found: {}".format(args.resume)) else: if os.path.exists(os.path.join(log_folder, 'log.log')) and args.rank == 0: shutil.copyfile( os.path.join(log_folder, 'log.log'), os.path.join(log_folder, 'log.log.{}'.format(int(time.time())))) if args.rank == 0: logfile = open(os.path.join(log_folder, 'log.log'), 'w') if args.rank == 0: command = " ".join(sys.argv) tensorboard_logger.configure(os.path.join(log_folder)) print(command, flush=True) print(args, flush=True) print(model, flush=True) print(command, file=logfile, flush=True) print(model_summary, flush=True) print(args, file=logfile, flush=True) if args.resume == '' and args.rank == 0: print(model, file=logfile, flush=True) print(model_summary, flush=True, file=logfile) for epoch in range(args.start_epoch, args.epochs): # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1, display=args.print_freq, label_smoothing=args.label_smoothing, clip_gradient=args.clip_gradient, gpu_id=args.gpu, rank=args.rank) if args.distributed: dist.barrier() # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate(val_loader, model, val_criterion, gpu_id=args.gpu) # update current learning rate if args.lr_scheduler == 'plateau': scheduler.step(val_losses) else: scheduler.step(epoch + 1) if args.distributed: dist.barrier() # only logging at rank 0 if args.rank == 0: print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = { 'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } save_checkpoint(save_dict, is_best, filepath=log_folder) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.optimizer.param_groups[0]['lr'] except Exception as e: lr = None if lr is not None: tensorboard_logger.log_value('learning-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) if args.distributed: dist.barrier() if args.rank == 0: logfile.close()
############################################ # Let's start running # ############################################ epoch_steps = int(TRAIN_SAMPLES / FLAGS.batch_size) print('number of steps each epoch: ', epoch_steps) epoch_index = 0 max_steps = FLAGS.epoch_number * epoch_steps ori_time = time.time() next_save_time = FLAGS.save_interval_secs for step in range(max_steps): start_time = time.time() if step % epoch_steps == 0: epoch_index += 1 if epoch_index > 0: sess.run(init_local_val) accuracy_val_value = utils.validate(sess, accuracy_val, FLAGS.batch_size, VAL_SAMPLES) duration = time.time() - start_time duration = float(duration) / 60.0 val_format = 'Time of validation after epoch %02d: %.2f mins, val accuracy: %.4f' print(val_format % (epoch_index - 1, duration, accuracy_val_value)) [_, total_l_value, entropy_l_value, reg_l_value, acc_value] = \ sess.run([train_op, total_loss, cross_entropy, reg_loss, accuracy]) total_duration = time.time() - ori_time total_duration = float(total_duration) assert not np.isnan(total_l_value), 'Model diverged with loss = NaN' if step % FLAGS.log_every_n_steps == 0:
def main(): """ Validate adv -> Rescale -> Validate scaled adv """ model = init_models() preprocessing = dict( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3 ) dataset_loader, dataset_size = utils.load_dataset( dataset_path=DATASET_PATH, dataset_image_len=DATASET_IMAGE_NUM ) # use GPU if available if torch.cuda.is_available(): model = model.cuda() fmodel = foolbox.models.PyTorchModel( model, bounds=(0, 1), num_classes=len(CLASS_NAMES), preprocessing=preprocessing, ) advs = np.load(ADV_SAVE_PATH) # * TASK 1/3: validate original adversaries control_group_acc = utils.validate( fmodel, dataset_loader, dataset_size, batch_size=BATCH_SIZE, advs=advs ) # * TASK 2/3: resize adversaries scales = [0.5, 2] methods = [ "INTER_NEAREST", "INTER_LINEAR", "INTER_AREA", "INTER_CUBIC", "INTER_LANCZOS4", ] # Initialize resized adversaries dict resized_advs = {method: {scale: None for scale in scales} for method in methods} pbar = tqdm(total=len(scales) * len(methods), desc="SCL") for method in methods: for scale in scales: resized_advs[method][scale] = utils.scale_adv(advs, scale, method) pbar.update(1) pbar.close() # * TASK 3/3: validate resized adversaries print( "{:<19} - success: {}%".format("CONTROL_GROUP ×1", 100 - control_group_acc) ) # Initialize success rate data success_data = {1: {"CONTROL_GROUP": 100.0 - control_group_acc}, 0.5: {}, 2: {}} success_data_flatten = {"CONTROL_GROUP ×1": 100.0 - control_group_acc} for scale in scales: for method in methods: acc = utils.validate( fmodel, dataset_loader, dataset_size, batch_size=BATCH_SIZE, advs=resized_advs[method][scale], silent=True, ) success_data[scale][method] = 100.0 - acc success_data_flatten["{} ×{}".format(method, scale)] = 100.0 - acc print("{:<14} ×{:<3} - success: {}%".format(method, scale, 100.0 - acc)) save_results_csv(success_data_flatten) # %% # * Plot results (success rate - advs) plot_results(success_data, success_data_flatten)
def updateModuleRule(self, phase, module, method, rule_data): """ Generic method to add new existing rule or update existing one for given module """ unlock_cluster = False response = "Rule has been added/updated successfully" retVal = 0 try: self.lock_cluster_for_update() unlock_cluster = True version_file_path_s3 = utils.get_version_path(self.cluster_id) version_data_dict = S3Persistence.get_version_data( version_file_path_s3) #read the version of module from version.json module_version = version_data_dict[module_rule_utils.modules][ module_rule_utils.access_phase][module] existing_rules = utils.get_existing_rules(self.cluster_id, phase, module, module_version) module_rule = ModuleRuleFactory.buildModule(module, rule_data) new_rule = module_rule.build() #TODO (navneet) : Handle this better way err = None if module_rule_utils.access_module == module: retVal, err = utils.validate(new_rule, access.schema) elif module_rule_utils.ratelimiter_module == module: retVal, err = utils.validate(new_rule, ratelimiter.schema) if retVal != 0: raise Exception(err) present, index = module_rule_utils.rule_present( existing_rules, new_rule) if present == False: existing_rules.append(new_rule) else: existing_rule = existing_rules[index] if module_rule_utils.rule_already_present( existing_rule, new_rule): retVal = -1 #no need to update again since rule is there response = "Rule is already present in datastore" else: del existing_rules[index] existing_rules.append(new_rule) if retVal == 0: #update only if new rule or some change to existing rule #update the version in version.json for module version = utils.get_timestamped_version() module_rules_file_new_path_s3 = utils.get_module_s3_path( self.cluster_id, phase, module, version) s3_util.put_obj_to_json(module_rules_file_new_path_s3, existing_rules) version_data_dict[module_rule_utils.modules][ module_rule_utils.access_phase][module] = version s3_util.put_obj_to_json(version_file_path_s3, version_data_dict) except Exception as e: (retVal, response) = (-1, str(e)) if unlock_cluster is True: self.unlock_cluster_for_update() return (retVal, response)
def main(): """ Validate -> Attack -> Revalidate """ model = init_models(TARGET_MODEL) preprocessing = dict( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3 ) dataset_loader, dataset_size = utils.load_dataset( dataset_path=DATASET_PATH, dataset_image_len=DATASET_IMAGE_NUM, batch_size=BATCH_SIZE, ) # Use GPU if available if torch.cuda.is_available(): model = model.cuda() fmodel = foolbox.models.PyTorchModel( model, bounds=(0, 1), num_classes=len(CLASS_NAMES), preprocessing=preprocessing, ) # * 1/3: Validate model's base prediction accuracy (about 97%) utils.validate(fmodel, dataset_loader, dataset_size, batch_size=BATCH_SIZE) # * 2/3: Perform an adversarial attack attack = attack_switcher(ATTACK_METHOD, fmodel) tic = time.time() pbar = tqdm(dataset_loader) pbar.set_description("ATT") adversaries = [] distances = [] for image, label in pbar: adv = attack(**attack_params(ATTACK_METHOD, image, label)) adv_batch = [] for single_adv, single_img in zip(adv, image.numpy()): # If an attack failed, replace adv with original image if np.isnan(single_adv).any(): single_adv = single_img perturb = single_adv - single_img # Only DeepFool and CW attacks are evaluated with L2 norm _lp = norm( perturb.flatten(), 2 if ATTACK_METHOD in ["cw", "df"] else np.inf ) # For attacks with minimization approaches (deep fool, cw, hop skip jump), # if distance larger than threshold, we consider attack failed if _lp > THRESHOLD[BUDGET_LEVEL][ATTACK_METHOD] and ATTACK_METHOD in [ "df", "cw", "hsj", ]: # _lp = 0.0 single_adv = single_img if np.isnan(_lp): _lp = 0.0 distances.append(_lp) adv_batch.append(single_adv) adversaries.append(adv_batch) adversaries = np.array(adversaries) # Total attack time toc = time.time() time_elapsed = toc - tic print( "Adversaries generated in: {:.2f}m {:.2f}s".format( time_elapsed // 60, time_elapsed % 60 ) ) #! Evaluate mean distance for attacks other than HSJA (HSJA is evaluated manually) if ATTACK_METHOD not in ["hsj"]: distances = np.asarray(distances) if SAVE_DIST: np.save("dist_{}.npy".format(ATTACK_METHOD), distances) print( "Distance: min {:.5f}, mean {:.5f}, max {:.5f}".format( distances.min(), np.median(distances), distances.max() ) ) plot_distances(distances) # Save generated adversaries if SAVE_ADVS: if not os.path.exists(ADV_SAVE_PATH): os.makedirs(ADV_SAVE_PATH) np.save(os.path.join(ADV_SAVE_PATH, ADV_SAVE_NAME), adversaries) # * 3/3: Validate model's adversary predictions utils.validate( fmodel, dataset_loader, dataset_size, batch_size=BATCH_SIZE, advs=adversaries, )
def import_data(request): data = request.data.get('data') model = request.data.get('model') # models direct fields import_data_structure = { 'Inventory':['part_number','alt_part_number','short_description','condition','product_title',\ 'quantity','unit_price','tag_date', 'hazmat', 'certification',\ 'unit_of_measure', 'stock_location','turn_around_time', 'un_code','hot_sale_item','description',\ ], } # model relational fields import_data_relations = { 'Inventory': { 'product_category': { 'model': 'ProductCategory', 'field': 'name', 'related_name': 'product_category_id', 'additional_data': {} }, 'product_manufacturer': { 'model': 'Manufacturer', 'field': 'name', 'related_name': 'product_manufacturer_id', 'additional_data': {} }, 'supplier': { 'model': 'Supplier', 'field': 'company_name', 'related_name': 'supplier_id', 'additional_data': {} }, }, } boolean_cols = [] integer_cols = [] date_cols = ['tag_date'] yes_no_cols = ['hazmat', 'hot_sale_item'] # try: if model in import_data_structure: # list of columns that are allowed to import allowCols = import_data_structure[model] allowReladedCols = import_data_relations[model] if data: # csv columns input by user inputCols = data.pop(0) data_new = inventory_helper.update_duplicates(data[0:-1]) objects = [] for row in data_new: if any(row): obj = eval(model)() for index, col in enumerate(inputCols): if not utils.validFieldValue(obj, col, row[index]): row[index] = 0 # return Response({'success':False, 'message':'Value of column {} is not valid at row # {}'.format(col.title(), data.index(row)+2)}) # check if column is allowed if col in allowCols: # need to set True or False for integers if col in boolean_cols: row[index] = True if int( row[index]) == 1 else False if col in integer_cols: try: row[index] = int(row[index]) except: row[index] = 0 if col in date_cols: try: row[index] = utils.validate(row[index]) except: row[index] = None # for yes no choices allowed only Yes, No if col in yes_no_cols: v = row[index].title() if v in ['Yes', 'No']: row[index] = v else: row[index] = None if col == 'unit_of_measure': v = row[index].upper() if v in ['CM', 'BOX', 'KG']: row[index] = v else: row[index] = None if not row[index] or row[index] == "": row[index] = None setattr(obj, col, row[index]) # lets check if cols belongs to related model than get id for relCol in allowReladedCols: finalRelatedColId = None related_model = globals()[allowReladedCols[relCol] ['model']] kwargs = {} if 'default' in allowReladedCols[ relCol] and allowReladedCols[relCol][ 'default'] is not None: kwargs = { allowReladedCols[relCol]['field']: allowReladedCols[relCol]['default'] } elif relCol in inputCols: # find column index and than value kwargs = { allowReladedCols[relCol]['field']: row[inputCols.index(relCol)] } # check for additional column data - like for manufacuture we need to save their type also so check that additional_data = allowReladedCols[relCol][ 'additional_data'] if additional_data: for ad in additional_data: # check if input has data with this column name if additional_data[ad] in inputCols: # find column index and than value kwargs[ad] = row[inputCols.index( additional_data[ad])] else: # for static data kwargs[ad] = additional_data[ad] queryset = related_model.objects.filter(**kwargs) # check if related item not exist than create new if not queryset.exists(): if 'fetchOnly' not in allowReladedCols[ relCol] and kwargs[allowReladedCols[ relCol]['field']] != '' and kwargs[ allowReladedCols[relCol] ['field']] is not None: related_model(**kwargs).save() finalRelatedColId = queryset.first().id else: finalRelatedColId = queryset.first().id setattr(obj, allowReladedCols[relCol]['related_name'], finalRelatedColId) obj.status = 1 objects.append(obj) model = eval(model) inventory_helper.update_inventory(model, objects) model.objects.bulk_create(objects) return Response({ 'success': True, 'message': 'Record has been imported suscessfully' })