def train(self): """ training logic :return: """ best_val_loss = None progress_bar = tqdm(range(self.epochs)) for epoch in progress_bar: self.scheduler.step() cur_lr = get_lr(self.optimizer) print('\nEpoch: {}/{}'.format(epoch, self.epochs)) print('Cur lr: {}'.format(cur_lr)) # train epoch_log = self._train_epoch(epoch) # validation epoch_valid_log, _ = self._valid_epoch(epoch) epoch_log = {**epoch_log, **epoch_valid_log} log_string = '' for key, value in epoch_log.items(): if key == 'epo': log_string += '%s:%d,' % (key, value) elif key == 'det q' or key == 'sto q': log_string += '%s:[' % key log_string += ','.join( ['%0.2f' % value[i] for i in self.nz_post]) log_string += '],' else: log_string += '%s:%0.3f,' % (key, value) #log_string += 'best:%0.3f' % best_val_loss progress_bar.set_description(log_string) PolicyKL.test(args=self.args, score_net=self.score_net, sdn_model=self.sdn_model, data_loader=self.data_loader.test_loader, nz_post=self.nz_post, device=self.device) if epoch % 10 == 0 and epoch > 0: print('This is the performance on the train dataset:') PolicyKL.test(args=self.args, score_net=self.score_net, sdn_model=self.sdn_model, data_loader=self.train_data_generator, nz_post=self.nz_post, device=self.device) torch.save( self.score_net.state_dict(), self.args.save_dir + '/{}_best_val_policy.dump'.format(self.sdn_name))
def cnn_train(model, data, epochs, optimizer, scheduler, device='cpu'): metrics = { 'epoch_times': [], 'test_top1_acc': [], 'test_top3_acc': [], 'train_top1_acc': [], 'train_top3_acc': [], 'lrs': [] } print("cnn training") for epoch in range(1, epochs + 1): scheduler.step() cur_lr = af.get_lr(optimizer) if not hasattr(model, 'augment_training') or model.augment_training: train_loader = data.aug_train_loader else: train_loader = data.train_loader start_time = time.time() model.train() print('Epoch: {}/{}'.format(epoch, epochs)) print('Cur lr: {}'.format(cur_lr)) for x, y in train_loader: cnn_training_step(model, optimizer, x, y, device) end_time = time.time() top1_test, top3_test = cnn_test(model, data.test_loader, device) print('Top1 Test accuracy: {}'.format(top1_test)) print('Top3 Test accuracy: {}'.format(top3_test)) metrics['test_top1_acc'].append(top1_test) metrics['test_top3_acc'].append(top3_test) top1_train, top3_train = cnn_test(model, train_loader, device) print('Top1 Train accuracy: {}'.format(top1_train)) print('top3 Train accuracy: {}'.format(top3_train)) metrics['train_top1_acc'].append(top1_train) metrics['train_top3_acc'].append(top3_train) epoch_time = int(end_time - start_time) print('Epoch took {} seconds.'.format(epoch_time)) metrics['epoch_times'].append(epoch_time) metrics['lrs'].append(cur_lr) return metrics
def epoch_routine(model, datas, optimizer, scheduler, epoch, epochs, augment, metrics, device): scheduler.step() cur_lr = af.get_lr(optimizer) print('cur_lr: {}'.format(cur_lr)) print("scheduler state dict: {}".format(scheduler.state_dict())) max_coeffs = calc_coeff(model) cur_coeffs = 0.01 + epoch * (np.array(max_coeffs) / epochs) cur_coeffs = np.minimum(max_coeffs, cur_coeffs) print("current coeffs: {}".format(cur_coeffs)) start_time = time.time() model.train() loader = get_loader(datas, augment) losses = [] for i, batch in enumerate(loader): total_loss = sdn_training_step(optimizer, model, cur_coeffs, batch, device, epoch) losses.append(total_loss) if i % 100 == 0: print("Loss: {}".format(total_loss)) top1_test, top3_test = sdn_test( model, datas.aug_valid_loader if augment else datas.valid_loader, device) end_time = time.time() print('Top1 Valid accuracies: {}'.format(top1_test)) print('Top3 Valid accuracies: {}'.format(top3_test)) top1_train, top3_train = sdn_test(model, get_loader(datas, augment), device) print('Top1 Train accuracies: {}'.format(top1_train)) print('Top3 Train accuracies: {}'.format(top3_train)) epoch_time = int(end_time - start_time) print('Epoch took {} seconds.'.format(epoch_time)) metrics['valid_top1_acc'].append(top1_test) metrics['valid_top3_acc'].append(top3_test) metrics['train_top1_acc'].append(top1_train) metrics['train_top3_acc'].append(top3_train) metrics['epoch_times'].append(epoch_time) metrics['lrs'].append(cur_lr) loss_moy = sum(losses) / len(losses) print("mean loss: {}".format(loss_moy)) return loss_moy
def sdn_train(model, data, epochs, optimizer, scheduler, device='cpu'): augment = model.augment_training metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[], 'test_cumulative_acc': []} max_coeffs = np.array([0.15, 0.3, 0.45, 0.6, 0.75, 0.9]) # max tau_i --- C_i values if model.ic_only: print('sdn will be converted from a pre-trained CNN... (The IC-only training)') else: print('sdn will be trained from scratch...(The SDN training)') for epoch in range(1, epochs+1): scheduler.step() cur_lr = af.get_lr(optimizer) print('\nEpoch: {}/{}'.format(epoch, epochs)) print('Cur lr: {}'.format(cur_lr)) if model.ic_only is False: # calculate the IC coeffs for this epoch for the weighted objective function cur_coeffs = 0.01 + epoch*(max_coeffs/epochs) # to calculate the tau at the currect epoch cur_coeffs = np.minimum(max_coeffs, cur_coeffs) print('Cur coeffs: {}'.format(cur_coeffs)) start_time = time.time() model.train() loader = get_loader(data, augment) for i, batch in enumerate(loader): if model.ds: if model.ic_only is False: # print('DS: True, IC: False') total_loss = sdn_training_step_DS(optimizer, model, cur_coeffs, batch, device) else: # print('DS: True, IC: True') total_loss = sdn_ic_only_step_DS(optimizer, model, batch, device) else: if model.ic_only is False: # print('DS: False, IC: False') total_loss = sdn_training_step(optimizer, model, cur_coeffs, batch, device) else: # print('DS: False, IC: True') total_loss = sdn_ic_only_step(optimizer, model, batch, device) if i % 100 == 0: print('Loss: {} '.format(total_loss)) top1_test, top5_test = sdn_test(model, data.val_loader, device) print('Top1 Test accuracies: {}'.format(top1_test)) print('Top5 Test accuracies: {}'.format(top5_test)) end_time = time.time() # check the detailed performance with sdn_get_detailed_results layer_correct, _, _, _ = sdn_get_detailed_results(model, loader=data.val_loader, device=device) layers = sorted(list(layer_correct.keys())) cum_correct = set() for layer in layers: cur_correct = layer_correct[layer] cum_correct = cum_correct | cur_correct print('Cumulative accuracies: {}'.format(len(cum_correct)/10000)) metrics['test_cumulative_acc'].append(len(cum_correct)/10000) top1_train, top5_train = sdn_test(model, get_loader(data, augment), device) print('Top1 Train accuracies: {}'.format(top1_train)) print('Top5 Train accuracies: {}'.format(top5_train)) metrics['train_top1_acc'].append(top1_train) metrics['train_top5_acc'].append(top5_train) epoch_time = int(end_time-start_time) metrics['epoch_times'].append(epoch_time) print('Epoch took {} seconds.'.format(epoch_time)) metrics['lrs'].append(cur_lr) return metrics
def sdn_train(model, data, epochs, optimizer, scheduler, device='cpu'): augment = model.augment_training metrics = { 'epoch_times': [], 'test_top1_acc': [], 'test_top5_acc': [], 'train_top1_acc': [], 'train_top5_acc': [], 'lrs': [] } max_coeffs = np.array([0.15, 0.3, 0.45, 0.6, 0.75, 0.9]) # max tau_i --- C_i values if model.ic_only: print( 'sdn will be converted from a pre-trained CNN... (The IC-only training)' ) else: print('sdn will be trained from scratch...(The SDN training)') for epoch in range(1, epochs + 1): scheduler.step() cur_lr = af.get_lr(optimizer) print('\nEpoch: {}/{}'.format(epoch, epochs)) print('Cur lr: {}'.format(cur_lr)) if model.ic_only is False: # calculate the IC coeffs for this epoch for the weighted objective function cur_coeffs = 0.01 + epoch * ( max_coeffs / epochs ) # to calculate the tau at the currect epoch cur_coeffs = np.minimum(max_coeffs, cur_coeffs) print('Cur coeffs: {}'.format(cur_coeffs)) start_time = time.time() model.train() loader = get_loader(data, augment) for i, batch in enumerate(loader): if model.ic_only is False: total_loss = sdn_training_step(optimizer, model, cur_coeffs, batch, device) else: total_loss = sdn_ic_only_step(optimizer, model, batch, device) if i % 100 == 0: print('Loss: {}: '.format(total_loss)) top1_test, top5_test = sdn_test(model, data.test_loader, device) print('Top1 Test accuracies: {}'.format(top1_test)) print('Top5 Test accuracies: {}'.format(top5_test)) end_time = time.time() metrics['test_top1_acc'].append(top1_test) metrics['test_top5_acc'].append(top5_test) top1_train, top5_train = sdn_test(model, get_loader(data, augment), device) print('Top1 Train accuracies: {}'.format(top1_train)) print('Top5 Train accuracies: {}'.format(top5_train)) metrics['train_top1_acc'].append(top1_train) metrics['train_top5_acc'].append(top5_train) epoch_time = int(end_time - start_time) metrics['epoch_times'].append(epoch_time) print('Epoch took {} seconds.'.format(epoch_time)) metrics['lrs'].append(cur_lr) return metrics