def validate(val_loader, model, criterion, args): top1 = tnt.AverageValueMeter() losses = tnt.AverageValueMeter() # switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target) losses.add(loss.item(), input.size(0)) top1.add(acc1[0].item() * input.size(0), input.size(0)) if args.extract_inner_data: print('early stop evaluation') break if i % args.print_freq == 0: print('[{}/{}] Loss: {:.4f} Acc: {:.2f}'.format( i, len(val_loader), losses.mean, top1.mean)) print('acc1: {:.4f}'.format(top1.mean)) return top1.mean
def train(train_loader, model, criterion, optimizer, epoch, args, writer): batch_time = tnt.AverageValueMeter() data_time = tnt.AverageValueMeter() losses = tnt.AverageValueMeter() top1 = tnt.AverageValueMeter() # switch to train mode model.train() if args.freeze_bn: model.apply(set_bn_eval) end = time.time() base_step = epoch * args.batch_num for i, data in enumerate(train_loader): # measure data loading time data_time.add(time.time() - end) if args.dali: inputs = data[0]["data"] targets = data[0]["label"].squeeze().long() else: inputs = data[0] targets = data[1] if args.gpu is not None: inputs = inputs.cuda(args.gpu, non_blocking=True) targets = targets.cuda(args.gpu, non_blocking=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1 = accuracy(outputs, targets) losses.add(loss.item(), inputs.size(0)) top1.add(acc1[0].item(), inputs.size(0)) writer.add_scalar('train/loss', losses.val, base_step + i) writer.add_scalar('train/acc1', top1.val, base_step + i) if args.debug: for k, v in model.state_dict().items(): if 'running_scale' in k: if v.shape[0] == 1: writer.add_scalar('train/{}'.format(k), v.item(), base_step + i) else: writer.add_histogram('train/{}'.format(k), v, base_step + i) # compute gradient and do SGD step # running_scale_list = [] # if base_step == 0: # for param in optimizer.param_groups[1]['params']: # if param.grad is not None: loss.backward() optimizer.step() optimizer.zero_grad() # Warning: 1. backward 2. step 3. zero_grad() # measure elapsed time batch_time.add(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{}] [{}/{}] Loss: {:.4f} Acc: {:.2f}'.format( epoch, i, args.batch_num, losses.val, top1.val))
def train(train_loader, model, criterion, optimizer, epoch, args, writer): batch_time = tnt.AverageValueMeter() data_time = tnt.AverageValueMeter() losses = tnt.AverageValueMeter() top1 = tnt.AverageValueMeter() # switch to train mode model.train() if args.freeze_bn: model.apply(set_bn_eval) end = time.time() base_step = epoch * len(train_loader) for i, (inputs, targets) in enumerate(train_loader): # measure data loading time data_time.add(time.time() - end) if args.gpu is not None: inputs = inputs.cuda(args.gpu, non_blocking=True) targets = targets.cuda(args.gpu, non_blocking=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1 = accuracy(outputs, targets) losses.add(loss.item(), inputs.size(0)) top1.add(acc1[0].item(), inputs.size(0)) writer.add_scalar('train/loss', losses.val, base_step + i) writer.add_scalar('train/acc1', top1.val, base_step + i) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.add(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{}] [{}/{}] Loss: {:.4f} Acc: {:.2f}'.format( epoch, i, len(train_loader), losses.val, top1.val)) if args.debug: for k, v in model.state_dict().items(): if 'weight_int' in k or 'weight_fold_int' in k: # writer.add_histogram('train/{}'.format(k), v.float(), base_step + i) bit_cnt = count_bit(v) bit_sparse = 1 - bit_cnt.sum().float() / (v.view(-1).shape[0] * 7) writer.add_scalar('train/bit_sparse/{}'.format(k), bit_sparse, base_step + i)
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix='Test: ') # switch to evaluate mode model.eval() # =============bit sparsity ================= # total_cnt = 0 # total_conv_cnt = 0 # total_weight_cnt = 0 # total_weight_conv_cnt = 0 # total_bit_cnt = 0 # total_bit_conv_cnt = 0 # for k, v in model.state_dict().items(): # if 'weight_int' in k: # cnt_sum = v.view(-1).shape[0] # total_cnt += cnt_sum # total_weight_cnt += (v.float().abs() > 0).sum().float() # bit_cnt = count_bit(v, complement=args.complement) # total_bit_cnt += bit_cnt.sum().float() # # weight_sparsity = 1 - (v.float().abs() > 0).sum().float() / cnt_sum # # bit_sparsity = bit_sparse(bit_cnt, args.complement) # if len(v.shape) == 4: # total_conv_cnt += cnt_sum # total_weight_conv_cnt += (v.float().abs() > 0).sum().float() # total_bit_conv_cnt += bit_cnt.sum().float() # if args.complement: # bit_width = 8 # else: # bit_width = 7 # total_conv_bs = 1 - total_bit_conv_cnt / total_conv_cnt / bit_width # total_bs = 1 - total_bit_cnt / total_cnt / bit_width # if 'alexnet_bp_no_fc' in args.arch: # return_bs = total_conv_bs # else: # return_bs = total_bs # =============bit sparsity end ================= if args.extract_inner_data: # wrapper.save_inner_hooks(model) wrapper.debug_graph_hooks(model) print('extract inner feature map and weight') with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) if args.extract_inner_data: for k, v in model.state_dict().items(): if 'num_batches_tracked' in k: continue nparray = v.detach().cpu().float().numpy() if 'weight' in k: radix_key = k.replace('weight', 'radix_position') try: radix_position = model.state_dict()[k.replace( 'weight', 'radix_position')] v_bp = FunctionBitPruningSTE.apply( v, radix_position) nparray = v_bp.detach().cpu().float().numpy() except KeyError: print('warning: {} does not exist.'.format( radix_key)) pass np_save = nparray.reshape(nparray.shape[0], -1) elif 'bias' in k: np_save = nparray.reshape(1, -1) else: print(k) np_save = nparray.reshape(-1, nparray.shape[-1]) np.savetxt('{}.txt'.format(k), np_save, delimiter=' ', fmt='%.8f') # np.save('{}'.format(k), v.cpu().float().numpy()) print('extract inner data done, return') break # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch, args, writer): # TODO: one epoch lr config batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() if args.freeze_bn: model.apply(set_bn_eval) end = time.time() base_step = epoch * len(train_loader) for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) writer.add_scalar('train/lr', optimizer.param_groups[0]['lr'], base_step + i) writer.add_scalar('train/acc1', top1.avg, base_step + i) writer.add_scalar('train/acc5', top5.avg, base_step + i) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) if args.debug: for k, v in model.state_dict().items(): if 'weight_int' in k: # writer.add_histogram('train/{}'.format(k), v.float(), base_step + i) bit_cnt = count_bit(v) bs = bit_sparse(bit_cnt) writer.add_scalar('train/bit_sparse/{}'.format(k), bs, base_step + i)