def __init__(self, model, loss, metrics, base_model, config, data_loader, target_class, valid_data_loader=None, train_logger=None): super(FineTuner, self).__init__(model, loss, metrics, None, None, config, train_logger) self.config = config self.data_loader = data_loader self.valid_data_loader = valid_data_loader self.do_validation = self.valid_data_loader is not None self.log_step = 1 if len(data_loader) < 10 else int(len(data_loader)/10) self.logger.info("Loading checkpoint: {} ...".format(base_model)) checkpoint = torch.load(base_model) self.start_epoch = checkpoint['config']['trainer']['epochs'] + 1 model.load_state_dict(checkpoint['state_dict']) self.target_class = target_class self.model.freeze() self.fc_id = model.swap_fc(len(target_class) + 1) # setup GPU device if available, move model into configured device self.device, device_ids = self._prepare_device(config['n_gpu']) self.model = model.to(self.device) if len(device_ids) > 1: self.model = torch.nn.DataParallel(model, device_ids=device_ids) # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, self.model.parameters()) self.optimizer = util.get_instance(torch.optim, 'optimizer', config, trainable_params) self.lr_scheduler = util.get_instance(torch.optim.lr_scheduler, 'lr_scheduler', config, self.optimizer)
def build_optimizer(model, config): optim_config = config['optimizer'] trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = getattr(optim, optim_config['type'])(trainable_params, **optim_config['args']) scheduler = get_instance(optim.lr_scheduler, 'scheduler', config, optimizer) return optimizer, scheduler
def train_base_model(config): cp.print_progress('Training base model') train_logger = Logger() # setup data_loader instances data_loader = util.get_instance(data_loaders, 'data_loader', config) valid_data_loader = data_loader.split_validation() # build model architecture model = util.get_instance(models, 'model', config) # get function handles of loss and metrics loss_fn = getattr(loss_functions, config['loss']) metrics = [getattr(metric_functions, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = util.get_instance(torch.optim, 'optimizer', config, trainable_params) lr_scheduler = util.get_instance(torch.optim.lr_scheduler, 'lr_scheduler', config, optimizer) util.print_setting(data_loader, valid_data_loader, model, loss_fn, metrics, optimizer, lr_scheduler) trainer = Trainer(model, loss_fn, metrics, optimizer, resume=None, config=config, data_loader=data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler, train_logger=train_logger) cp.print_progress('TRAINER\n', trainer) trainer.train() cp.print_progress('Training base model completed') return os.path.join(trainer.checkpoint_dir, 'model_best.pth')
def _setup_model(self): """ Setup model and print summary """ model = get_instance( module_arch, 'arch', global_config, ) # Print out the model architecture and number of parameters model.summary() self.model = model.to(self.device)
def fine_tune_model(config, base_model): target_class = config['target_class'] cp.print_progress('Fine tune model with', target_class) config['data_loader']['args']['target_class'] = target_class train_logger = Logger() # setup data_loader instances data_loader = util.get_instance(data_loaders, 'data_loader', config) valid_data_loader = data_loader.split_validation() # build model architecture model = util.get_instance(models, 'model', config) # get function handles of loss and metrics loss_fn = getattr(loss_functions, config['loss']) metrics = [getattr(metric_functions, met) for met in config['metrics']] util.print_setting(data_loader, valid_data_loader, model, loss_fn, metrics, None, None) # build base model trainer = FineTuner(model, loss_fn, metrics, base_model=base_model, config=config, data_loader=data_loader, valid_data_loader=valid_data_loader, train_logger=train_logger, target_class=target_class) trainer.train() cp.print_progress('Fine tuning is completed for ', target_class) return os.path.join(trainer.checkpoint_dir, 'model_best.pth')
def __init__(self, data_path='./data/out.pkl', resume='./weights/Music_LSTM_big/0304_041925/model_best.pth'): """ Initialize the predict class """ self.data_path = data_path self.dataset = MidiDataset(self.data_path) self.resume = resume # Load checkpoint if torch.cuda.is_available(): checkpoint = torch.load(self.resume) else: checkpoint = torch.load(self.resume, map_location=lambda storage, loc: storage) state_dict = checkpoint['state_dict'] self.config = checkpoint['config'] # Load model self.model = get_instance(module_model, 'model', self.config) self.model.load_state_dict(state_dict) self.model.eval()
def load_model(config, base_model, target_class, seed=None): # build model architecture model = util.get_instance(models, 'model', config) # load state dict if not torch.cuda.is_available(): checkpoint = torch.load(base_model, map_location='cpu') else: checkpoint = torch.load(base_model) state_dict = checkpoint['state_dict'] if config['n_gpu'] > 1: model = torch.nn.DataParallel(model) model.load_state_dict(state_dict) seed = config['data_loader']['args'].get('seed', None) unknown = config['data_loader']['args'].get('unknown', False) # setup data_loader instances data_loader = getattr(data_loaders, config['data_loader']['type'])( config['data_loader']['args']['data_dir'], batch_size=512, shuffle=False, validation_split=0.0, training=False, num_workers=2, target_class=target_class, unknown=unknown, seed=seed) # get function handles of loss and metrics loss_fn = getattr(loss_functions, config['loss']) metrics = [getattr(metric_functions, met) for met in config['metrics']] return model, data_loader, loss_fn, metrics
def _setup_data_loader(self, key='data_loader'): return get_instance(module_data, key, global_config)
def main(config, resume): # # setup data_loader instances # data_loader = get_instance(data_loaders, 'data_loader', config) # setup data_loader instances data_loader = getattr(data_loaders, config['data_loader']['type'])( config['data_loader']['args']['data_dir'], batch_size=512, shuffle=False, validation_split=0.0, training=False, num_workers=2 ) # TODO :: use generic function for printing out model setting cp.print_progress('test DATASET\n', data_loader) # build model architecture model = util.get_instance(models, 'model', config) cp.print_progress('MODEL\n', model) # get function handles of loss and metrics loss_fn = getattr(loss_functions, config['loss']) cp.print_progress('LOSS FUNCTION\n', loss_fn.__name__) metrics = [getattr(metric_functions, met) for met in config['metrics']] cp.print_progress('METRICS\n', [metric.__name__ for metric in metrics]) # load state dict checkpoint = torch.load(resume) state_dict = checkpoint['state_dict'] if config['n_gpu'] > 1: model = torch.nn.DataParallel(model) model.load_state_dict(state_dict) # prepare model for testing device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) model.eval() total_loss = 0.0 total_metrics = torch.zeros(len(metrics)) with torch.no_grad(): for i, (data, target) in enumerate(tqdm(data_loader)): data, target = data.to(device), target.to(device) output = model(data) # computing loss, metrics on test set loss = loss_fn(output, target) batch_size = data.shape[0] total_loss += loss.item() * batch_size for i, metric in enumerate(metrics): total_metrics[i] += metric(output, target) * batch_size n_samples = len(data_loader.sampler) log = {'loss': total_loss / n_samples} log.update({met.__name__ : total_metrics[i].item() / n_samples for i, met in enumerate(metrics)}) test_result_str = 'TEST RESULTS\n' for key, val in log.items(): test_result_str += ('\t' + str(key) + ' : ' + str(val) + '\n') cp.print_progress(test_result_str)