def test(self): epoch = self.scheduler.last_epoch + 1 self.ckp.write_log('\nEvaluation:') self.loss.start_log(train=False) self.model.eval() timer_test = utility.timer() timer_test.tic() with torch.no_grad(): for img, label in tqdm(self.loader_test, ncols=80): img, label = self.prepare(img, label) prediction = self.model(img) self.loss(prediction, label, train=False) self.loss.end_log(len(self.loader_test.dataset), train=False) # Lower is better best = self.loss.log_test.min(0) for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')): self.ckp.write_log('{}: {:.3f} (Best: {:.3f} from epoch {})'. format(measure, self.loss.log_test[-1, i], best[0][i], best[1][i] + 1)) if hasattr(self, 'epochs_searching') and self.converging: best = self.loss.log_test[:self.epochs_searching, :].min(0) self.ckp.write_log('\nBest during searching') for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')): self.ckp.write_log('{}: {:.3f} from epoch {}'.format(measure, best[0][i], best[1][i])) self.ckp.write_log('Time: {:.2f}s\n'.format(timer_test.toc()), refresh=True) is_best = self.loss.log_test[-1, self.args.top] <= best[0][self.args.top] self.ckp.save(self, epoch, converging=self.converging, is_best=is_best) self.ckp.save_results(epoch, self.model) # scheduler.step is moved from training procedure to test procedure self.scheduler.step()
def train(self): self.loss.step() epoch = self.scheduler.last_epoch + 1 lr = self.scheduler.get_lr()[0] self.ckp.write_log('[Epoch {}]\tLearning rate: {:.2e}'.format( epoch, Decimal(lr))) self.loss.start_log() self.model.train() # from IPython import embed; embed(); exit() timer_data, timer_model = utility.timer(), utility.timer() for batch, (lr, hr, _, idx_scale) in enumerate(self.loader_train): lr, hr = self.prepare([lr, hr]) timer_data.hold() timer_model.tic() # from IPython import embed; embed(); exit() self.optimizer.zero_grad() sr = self.model(idx_scale, lr) loss = self.loss(sr, hr) if loss.item() < self.args.skip_threshold * self.error_last: loss.backward() self.optimizer.step() else: print('Skip this batch {}! (Loss: {})'.format( batch + 1, loss.item())) timer_model.hold() if (batch + 1) % self.args.print_every == 0: self.ckp.write_log('[{}/{}]\t{}\t{:.3f}+{:.3f}s'.format( (batch + 1) * self.args.batch_size, len(self.loader_train.dataset), self.loss.display_loss(batch), timer_model.release(), timer_data.release())) timer_data.tic() self.loss.end_log(len(self.loader_train)) self.error_last = self.loss.log[-1, -1] self.scheduler.step()
def test(self): epoch = self.scheduler.last_epoch self.ckp.write_log('\nEvaluation:') self.ckp.add_log(torch.zeros(1, len(self.scale))) self.model.eval() timer_test = utility.timer() with torch.no_grad(): for idx_scale, scale in enumerate(self.scale): eval_acc = 0 self.loader_test.dataset.set_scale(idx_scale) tqdm_test = tqdm(self.loader_test, ncols=80) for idx_img, (lr, hr, filename, _) in enumerate(tqdm_test): # from IPython import embed; embed(); filename = filename[0] no_eval = (hr.nelement() == 1) if not no_eval: lr, hr = self.prepare([lr, hr]) else: lr = self.prepare([lr])[0] sr = self.model(idx_scale, lr) sr = utility.quantize(sr, self.args.rgb_range) save_list = [sr] if not no_eval: eval_acc += utility.calc_psnr( sr, hr, scale, self.args.rgb_range, benchmark=self.loader_test.dataset.benchmark) save_list.extend([lr, hr]) if self.args.save_results: self.ckp.save_results(filename, save_list, scale) self.ckp.log[-1, idx_scale] = eval_acc / len(self.loader_test) best = self.ckp.log.max(0) self.ckp.write_log( '[{} x{}]\tPSNR: {:.3f} (Best: {:.3f} @epoch {})'.format( self.args.data_test, scale, self.ckp.log[-1, idx_scale], best[0][idx_scale], best[1][idx_scale] + 1)) self.ckp.write_log('Total time: {:.2f}s\n'.format(timer_test.toc()), refresh=True) if not self.args.test_only: self.ckp.save(self, epoch, is_best=(best[1][0] + 1 == epoch))
def forward(self, x): body_list = self.body_list timer = utility.timer() for i in range(len(body_list)): layer = body_list[i] # layer = nn.Sequential(layer) timer.tic() x = layer(x) self.total_time[i] += timer.toc() # if isinstance(body_list[i], common.BasicBlock): # self.total_time[i] += float(end_time - begin_time) # x = self.features(x) x = x.view(x.size(0), -1) # print(x.shape) x = self.classifier(x) return x
def train(self): self.loss.step() epoch = self.scheduler.last_epoch + 1 learning_rate = self.scheduler.get_lr()[0] idx_scale = self.args.scale if not self.converging: stage = 'Searching Stage' else: stage = 'Finetuning Stage (Searching Epoch {})'.format( self.epochs_searching) self.ckp.write_log('\n[Epoch {}]\tLearning rate: {:.2e}\t{}'.format( epoch, Decimal(learning_rate), stage)) self.loss.start_log() self.model.train() timer_data, timer_model = utility.timer(), utility.timer() for batch, (lr, hr, _) in enumerate(self.loader_train): # if batch <= 1200: lr, hr = self.prepare([lr, hr]) timer_data.hold() timer_model.tic() self.optimizer.zero_grad() sr = self.model(idx_scale, lr) loss = self.loss(sr, hr) if loss.item() < self.args.skip_threshold * self.error_last: # Adam loss.backward() self.optimizer.step() # proximal operator if not self.converging: self.model.get_model().proximal_operator(learning_rate) # check the compression ratio if (batch + 1) % self.args.compression_check_frequency == 0: # set the channels of the potential pruned model self.model.get_model().set_parameters() # update the flops and number of parameters self.flops_prune = get_flops(self.model.get_model()) self.flops_compression_ratio = self.flops_prune / self.flops self.params_prune = get_parameters( self.model.get_model()) self.params_compression_ratio = self.params_prune / self.params self.flops_ratio_log.append( self.flops_compression_ratio) self.params_ratio_log.append( self.params_compression_ratio) if self.terminate(): break if (batch + 1) % 1000 == 0: self.model.get_model().latent_vector_distribution( epoch, batch + 1, self.ckp.dir) self.model.get_model().per_layer_compression_ratio( epoch, batch + 1, self.ckp.dir) else: print('Skip this batch {}! (Loss: {}) (Threshold: {})'.format( batch + 1, loss.item(), self.args.skip_threshold * self.error_last)) timer_model.hold() if (batch + 1) % self.args.print_every == 0: self.ckp.write_log( '[{}/{}]\t{}\t{:.3f}+{:.3f}s' '\tFlops Ratio: {:.2f}% = {:.4f} G / {:.4f} G' '\tParams Ratio: {:.2f}% = {:.2f} k / {:.2f} k'.format( (batch + 1) * self.args.batch_size, len(self.loader_train.dataset), self.loss.display_loss(batch), timer_model.release(), timer_data.release(), self.flops_compression_ratio * 100, self.flops_prune / 10.**9, self.flops / 10.**9, self.params_compression_ratio * 100, self.params_prune / 10.**3, self.params / 10.**3)) timer_data.tic() # else: # break self.loss.end_log(len(self.loader_train)) self.error_last = self.loss.log[-1, -1] # self.error_last = loss self.scheduler.step()
def train(self): epoch, lr = self.start_epoch() self.model.begin( epoch, self.ckp ) #TODO: investigate why not using self.model.train() directly self.loss.start_log() timer_data, timer_model = utility.timer(), utility.timer() n_samples = 0 for batch, (img, label) in enumerate(self.loader_train): # embed() if (self.args.data_train == 'ImageNet' or self.args.model.lower() == 'efficientnet_hh') and not self.converging: if self.args.model == 'ResNet_ImageNet_HH' or self.args.model == 'RegNet_ImageNet_HH': divider = 4 else: divider = 2 print('Divider is {}'.format(divider)) batch_size = img.shape[0] // divider img = img[:batch_size] label = label[:batch_size] # embed() img, label = self.prepare(img, label) n_samples += img.size(0) timer_data.hold() timer_model.tic() self.optimizer.zero_grad() prediction = self.model(img) # embed() if (not self.converging and self.args.distillation_stage == 'c') or \ (self.converging and not self.args.distillation_final): loss, _ = self.loss(prediction, label) else: with torch.no_grad(): prediction_teacher = self.model_teacher(img) if not self.args.distillation_inter: prediction = [prediction] prediction_teacher = [prediction_teacher] loss, _ = self.loss(prediction[0], label) if self.args.distillation_final == 'kd': loss_distill_final = distillation(prediction[0], prediction_teacher[0], T=4) loss = 0.4 * loss_distill_final + 0.6 * loss elif self.args.distillation_inter == 'sp': loss_distill_final = similarity_preserving( prediction[0], prediction_teacher[0]) * 3000 loss = loss_distill_final + loss if self.args.distillation_inter == 'kd': loss_distill_inter = 0 for p, pt in zip(prediction[1], prediction_teacher[1]): loss_distill_inter += self.loss_mse(p, pt) # embed() loss_distill_inter = loss_distill_inter / len( prediction[1]) * self.args.distill_beta loss = loss_distill_inter + loss elif self.args.distillation_inter == 'sp': loss_distill_inter = 0 for p, pt in zip(prediction[1], prediction_teacher[1]): loss_distill_inter += similarity_preserving(p, pt) loss_distill_inter = loss_distill_inter / len( prediction[1]) * 3000 * self.args.distill_beta # loss_distill_inter = similarity_preserving(prediction[1], prediction_teacher[1]) loss = loss_distill_inter + loss # else: self.args.distillation_inter == '', do nothing here # SGD loss.backward() self.optimizer.step() if not self.converging and self.args.use_prox: # if epoch > 5: # proximal operator self.model.get_model().proximal_operator(lr) if (batch + 1) % self.args.compression_check_frequency == 0: self.model.get_model().set_parameters() self.flops_prune = get_flops(self.model.get_model()) self.flops_compression_ratio = self.flops_prune / self.flops self.params_prune = get_parameters(self.model.get_model()) self.params_compression_ratio = self.params_prune / self.params self.flops_ratio_log.append(self.flops_compression_ratio) self.params_ratio_log.append(self.params_compression_ratio) if self.terminate(): break if (batch + 1) % 300 == 0: self.model.get_model().latent_vector_distribution( epoch, batch + 1, self.ckp.dir) self.model.get_model().per_layer_compression_ratio( epoch, batch + 1, self.ckp.dir) timer_model.hold() if (batch + 1) % self.args.print_every == 0: s = '{}/{} ({:.0f}%)\tNLL: {:.3f} Top1: {:.2f} / Top5: {:.2f}\t'.format( n_samples, len(self.loader_train.dataset), 100.0 * n_samples / len(self.loader_train.dataset), *(self.loss.log_train[-1, :] / n_samples)) if self.converging or (not self.converging and self.args.distillation_stage == 's'): if self.args.distillation_final: s += 'DFinal: {:.3f} '.format(loss_distill_final) if self.args.distillation_inter: s += 'DInter: {:.3f}'.format(loss_distill_inter) if self.args.distillation_final or self.args.distillation_inter: s += '\t' s += 'Time: {:.1f}+{:.1f}s\t'.format(timer_model.release(), timer_data.release()) if hasattr(self, 'flops_compression_ratio') and hasattr( self, 'params_compression_ratio'): s += 'Flops: {:.2f}% = {:.4f} [G] / {:.4f} [G]\t' \ 'Params: {:.2f}% = {:.2f} [k] / {:.2f} [k]'.format( self.flops_compression_ratio * 100, self.flops_prune / 10. ** 9, self.flops / 10. ** 9, self.params_compression_ratio * 100, self.params_prune / 10. ** 3, self.params / 10. ** 3) self.ckp.write_log(s) if self.args.summary: if (batch + 1) % 50 == 0: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_scalar( 'data/' + name, param.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_scalar( 'data/' + name + '_grad', param.grad.clone().cpu().data.abs().mean(). numpy(), 1000 * (epoch - 1) + batch) if (batch + 1) == 500: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_histogram( name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_histogram( name + '_grad', param.grad.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) timer_data.tic() if not self.converging and epoch == self.args.epochs_grad and batch == 1: break self.model.log(self.ckp) # TODO: why this is used? self.loss.end_log(len(self.loader_train.dataset))
def train(self): epoch = self.start_epoch() self.model.begin( epoch, self.ckp ) #TODO: investigate why not using self.model.train() directly self.loss.start_log() # modules = self.model.get_model().find_modules() #TODO: merge this timer_data, timer_model = utility.timer(), utility.timer() n_samples = 0 for batch, (img, label) in enumerate(self.loader_train): img, label = self.prepare(img, label) n_samples += img.size(0) timer_data.hold() timer_model.tic() # Forward pass and computing the loss function self.optimizer.zero_grad() prediction = self.model(img) loss, _ = self.loss(prediction, label) lossp = self.model.get_model().compute_loss( batch + 1, epoch, self.converging) if not self.converging: # use projection loss for SGD and don't use it for PG if self.args.optimizer == 'SGD': loss = loss + sum(lossp) else: # use distillation loss if self.args.distillation: with torch.no_grad(): prediction_teacher = self.model_teacher(img) loss_distill = distillation(prediction, prediction_teacher, T=4) loss = loss_distill * 0.4 + loss * 0.6 # Backward pass and computing the gradients loss.backward() # Update learning rate based on the gradients. ResNet20, 56, 164, and Wide ResNet if not self.converging and self.lr_adjust_flag: self.model.get_model().update_grad_ratio() self.scheduler.running_grad_ratio = self.model.get_model( ).running_grad_ratio for param_group, lr in zip(self.optimizer.param_groups, self.scheduler.get_lr()): param_group['lr'] = lr # Update the parameters if self.args.optimizer == 'SGD': self.optimizer.step() elif self.args.optimizer == 'PG': # Gradient step self.optimizer.step() if not self.converging and (batch + 1) % self.args.prox_freq == 0: # Anneal the regularization factor reg = reg_anneal(lossp[0], self.args.regularization_factor, self.args.annealing_factor, self.args.annealing_t1, self.args.annealing_t2) # Proximal step self.model.get_model().proximal_operator( self.scheduler.get_lr()[-1], batch + 1, reg) elif self.args.optimizer == 'APG': # TODO: still interesting to investigate APG self.optimizer.converging = self.converging self.optimizer.batch = batch + 1 self.optimizer.step() timer_model.hold() if (batch + 1) % self.args.print_every == 0: s = '{}/{} ({:.0f}%)\tTotal: {:.3f} / P1: {:.3f}'.\ format(n_samples, len(self.loader_train.dataset), 100.0 * n_samples / len(self.loader_train.dataset), loss, lossp[0]) if len(lossp) == 2: s += ' / P2: {:.3f}'.format(lossp[1]) if not self.converging: if self.lr_adjust_flag: s += ' / rP: {:.3f}'.format( self.model.get_model().running_grad_ratio) else: if self.args.distillation: s += ' / Dis: {:.3f}'.format(loss_distill) s += ' / NLL: {:.3f}\tTop1: {:.2f} / Top5: {:.2f}\tTime: {:.1f}+{:.1f}s'.\ format(*(self.loss.log_train[-1, :] / n_samples), timer_model.release(), timer_data.release()) self.ckp.write_log(s) if self.args.summary: if (batch + 1) % 50 == 0: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_scalar( 'data/' + name, param.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_scalar( 'data/' + name + '_grad', param.grad.clone().cpu().data.abs().mean(). numpy(), 1000 * (epoch - 1) + batch) if (batch + 1) == 500: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_histogram( name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_histogram( name + '_grad', param.grad.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) timer_data.tic() self.model.log(self.ckp) self.loss.end_log(len(self.loader_train.dataset))
def train(self): epoch, lr = self.start_epoch() self.model.begin(epoch, self.ckp) #TODO: investigate why not using self.model.train() directly self.loss.start_log() timer_data, timer_model = utility.timer(), utility.timer() n_samples = 0 for batch, (img, label) in enumerate(self.loader_train): img, label = self.prepare(img, label) n_samples += img.size(0) timer_data.hold() timer_model.tic() self.optimizer.zero_grad() prediction = self.model(img) loss, _ = self.loss(prediction, label) # SGD loss.backward() self.optimizer.step() # proximal operator if not self.converging: self.model.get_model().proximal_operator(lr) if (batch + 1) % self.args.compression_check_frequency == 0: self.model.get_model().set_parameters() self.flops_prune = get_flops(self.model.get_model()) self.flops_compression_ratio = self.flops_prune / self.flops self.params_prune = get_parameters(self.model.get_model()) self.params_compression_ratio = self.params_prune / self.params self.flops_ratio_log.append(self.flops_compression_ratio) self.params_ratio_log.append(self.params_compression_ratio) # if self.terminate(): # break if (batch + 1) % 300 == 0: self.model.get_model().latent_vector_distribution(epoch, batch + 1, self.ckp.dir) self.model.get_model().per_layer_compression_ratio(epoch, batch + 1, self.ckp.dir) timer_model.hold() if (batch + 1) % self.args.print_every == 0: self.ckp.write_log('{}/{} ({:.0f}%)\t' 'NLL: {:.3f}\tTop1: {:.2f} / Top5: {:.2f}\t' 'Time: {:.1f}+{:.1f}s\t' 'Flops Ratio: {:.2f}% = {:.4f} [G] / {:.4f} [G]\t' 'Params Ratio: {:.2f}% = {:.2f} [k] / {:.2f} [k]'.format( n_samples, len(self.loader_train.dataset), 100.0 * n_samples / len(self.loader_train.dataset), *(self.loss.log_train[-1, :] / n_samples), timer_model.release(), timer_data.release(), self.flops_compression_ratio * 100, self.flops_prune / 10. ** 9, self.flops / 10. ** 9, self.params_compression_ratio * 100, self.params_prune / 10. ** 3, self.params / 10. ** 3)) if not self.converging and self.terminate(): break if self.args.summary: if (batch + 1) % 50 == 0: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find('weight') >= 0: self.writer.add_scalar('data/' + name, param.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_scalar('data/' + name + '_grad', param.grad.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) if (batch + 1) == 500: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find('weight') >= 0: self.writer.add_histogram(name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_histogram(name + '_grad', param.grad.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) timer_data.tic() self.model.log(self.ckp) # TODO: why this is used? self.loss.end_log(len(self.loader_train.dataset))
def train(self): epoch, _ = self.start_epoch() self.model.begin( epoch, self.ckp ) #TODO: investigate why not using self.model.train() directly self.loss.start_log() timer_data, timer_model = utility.timer(), utility.timer() n_samples = 0 for batch, (img, label) in enumerate(self.loader_train): # if batch<=1: img, label = self.prepare(img, label) n_samples += img.size(0) timer_data.hold() timer_model.tic() self.optimizer.zero_grad() # embed() prediction = self.model(img) loss, _ = self.loss(prediction, label) loss.backward() self.optimizer.step() timer_model.hold() if (batch + 1) % self.args.print_every == 0: self.ckp.write_log( '{}/{} ({:.0f}%)\t' 'NLL: {:.3f}\t' 'Top1: {:.2f} / Top5: {:.2f}\t' 'Time: {:.1f}+{:.1f}s'.format( n_samples, len(self.loader_train.dataset), 100.0 * n_samples / len(self.loader_train.dataset), *(self.loss.log_train[-1, :] / n_samples), timer_model.release(), timer_data.release())) if self.args.summary: if (batch + 1) % 50 == 0: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_scalar( 'data/' + name, param.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) self.writer.add_scalar( 'data/' + name + '_grad', param.grad.clone().cpu().data.abs().mean(). numpy(), 1000 * (epoch - 1) + batch) if (batch + 1) == 500: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_histogram( name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) self.writer.add_histogram( name + '_grad', param.grad.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) # else: # break timer_data.tic() self.model.log(self.ckp) self.loss.end_log(len(self.loader_train.dataset))
def train(self): epoch = self.start_epoch() self.model.begin(epoch, self.ckp) self.loss.start_log() # modules = self.model.get_model().find_modules() #TODO: merge this timer_data, timer_model = utility.timer(), utility.timer() n_samples = 0 for batch, (img, label) in enumerate(self.loader_train): # if batch<=1: img, label = self.prepare(img, label) n_samples += img.size(0) timer_data.hold() timer_model.tic() self.optimizer.zero_grad() # embed() prediction = self.model(img) loss, _ = self.loss(prediction, label) if self.args.distillation: with torch.no_grad(): prediction_teacher = self.model_teacher(img) loss_distill = distillation(prediction, prediction_teacher, T=4) loss = loss_distill * 0.4 + loss * 0.6 loss.backward() self.optimizer.step() timer_model.hold() if self.args.summary: if (batch + 1) % 50 == 0: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_scalar( 'data/' + name, param.clone().cpu().data.abs().mean().numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_scalar( 'data/' + name + '_grad', param.grad.clone().cpu().data.abs().mean(). numpy(), 1000 * (epoch - 1) + batch) if (batch + 1) == 500: for name, param in self.model.named_parameters(): if name.find('features') >= 0 and name.find( 'weight') >= 0: self.writer.add_histogram( name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) if param.grad is not None: self.writer.add_histogram( name + '_grad', param.grad.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch) timer_data.tic() self.model.log(self.ckp) self.loss.end_log(len(self.loader_train.dataset))
def test(self): self.model.get_model().total_time = [0] * len( self.model.get_model().body_list) epoch = self.scheduler.last_epoch + 1 self.ckp.write_log('\nEvaluation:') self.loss.start_log(train=False) self.model.eval() timer_test = utility.timer() i = 0 with torch.no_grad(): for img, label in tqdm(self.loader_test, ncols=80): i = i + 1 # if i == 5: # break img, label = self.prepare(img, label) timer_test.tic() prediction = self.model(img) timer_test.hold() self.loss(prediction, label, train=False) current_time = timer_test.acc self.loss.end_log(len(self.loader_test.dataset), train=False) # Lower is better best = self.loss.log_test.min(0) for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')): self.ckp.write_log( '{}: {:.3f} (Best: {:.3f} from epoch {})'.format( measure, self.loss.log_test[-1, i], best[0][i], best[1][i] + 1)) if hasattr(self, 'epoch_continue') and self.converging: best = self.loss.log_test[:self.epoch_continue, :].min(0) self.ckp.write_log('\nBest during searching') for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')): self.ckp.write_log('{}: {:.3f} from epoch {}'.format( measure, best[0][i], best[1][i])) self.ckp.write_log('Time: {:.2f}s\n'.format(current_time), refresh=True) is_best = self.loss.log_test[-1, self.args.top] <= best[0][self.args.top] self.ckp.save(self, epoch, converging=self.converging, is_best=is_best) # This is used by clustering convolutional kernels # self.ckp.save_results(epoch, self.model) # scheduler.step is moved from training procedure to test procedure self.scheduler.step() # 下面是新加的统计内容 self.model.get_model().timer_test_list.append( "{:.3f}".format(current_time)) print("whole network inference time : ") print(self.model.get_model().timer_test_list) print("each layer time: ") for i in range(len(self.model.get_model().total_time)): self.model.get_model().total_time[i] = float("{:.5f}".format( self.model.get_model().total_time[i])) print(self.model.get_model().total_time) print("sum : ") print("{:.5f}".format(sum(self.model.get_model().total_time))) if self.model.get_model().layer_num != -1: self.model.get_model().spec_list.append("{:.5f}".format( self.model.get_model().total_time[ self.model.get_model().layer_num])) print("the %d 's layer inference time list : " % self.model.get_model().layer_num) print(self.model.get_model().spec_list) self.model.get_model().sum_list.append("{:.5f}".format( sum(self.model.get_model().total_time))) print("sum list : ") print(self.model.get_model().sum_list) self.model.get_model().top1_err_list.append("{:.3f}".format( self.loss.log_test[-1, 1])) print("top1 error list : ") print(self.model.get_model().top1_err_list)