def test(self, model_path=''): print('[app][test] Test model') try: print('*** [app][test] Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path, self.is_cuda) except ValueError as e: print('[app][test] Error while loading the model.', e) self.save_traintest() # print('\n[app][test] Test all') # # acc = np.mean(self.accuracies) # # acc = self.accuracies # graphs = self.data[GRAPH] # labels = self.labels # self.run_test(graphs, labels) graphs = load_pickle(os.path.join(self.odir, 'train')) labels = load_pickle(os.path.join(self.odir, 'train_labels')) print('\n[app][test] Test on train graphs ({})'.format(len(labels)), os.path.join(self.odir, 'train')) self.run_test_fold(graphs, labels, fold=300) graphs = load_pickle(os.path.join(self.odir, 'test')) labels = load_pickle(os.path.join(self.odir, 'test_labels')) print('\n[app][test] Test on test graphs ({})'.format(len(labels)), os.path.join(self.odir, 'test')) self.run_test_fold(graphs, labels, fold=150)
def if_resume(self): if self.cfg.logger.resume: # load checkpoint print(f"{datetime.now():%Y-%m-%d %H:%M:%S} - LOADING checkpoint!!!") save_dir = self.cfg.directory.load checkpoint = load_checkpoint(save_dir, self.device) self.model.load_state_dict(checkpoint["model"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) self.epoch = checkpoint["epoch"] + 1 self.e_loss = checkpoint["e_loss"] self.iteration = checkpoint["iteration"] + 1 self.best = checkpoint["best"] print( f"{datetime.now():%Y-%m-%d %H:%M:%S} " + f"LOADING checkpoint was successful, start from epoch {self.epoch}" + f" and loss {self.best}" ) else: self.epoch = 1 self.iteration = 0 self.best = np.inf self.e_loss = [] self.logger.set_epoch(self.epoch)
def __init__(self, cfg_dir: str, data_loader: DataLoader, model, labels_definition): self.cfg = get_conf(cfg_dir) self._labels_definition = labels_definition #TODO self.logger = self.init_logger(self.cfg.logger) #self.dataset = CustomDataset(**self.cfg.dataset) self.data = data_loader #self.val_dataset = CustomDatasetVal(**self.cfg.val_dataset) #self.val_data = DataLoader(self.val_dataset, **self.cfg.dataloader) # self.logger.log_parameters({"tr_len": len(self.dataset), # "val_len": len(self.val_dataset)}) self.model = model #self.model._resnet.conv1.apply(init_weights_normal) self.device = self.cfg.train_params.device self.model = self.model.to(device=self.device) if self.cfg.train_params.optimizer.lower() == "adam": self.optimizer = optim.Adam(self.model.parameters(), **self.cfg.adam) elif self.cfg.train_params.optimizer.lower() == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), **self.cfg.rmsprop) else: raise ValueError( f"Unknown optimizer {self.cfg.train_params.optimizer}") self.lr_scheduler = optim.lr_scheduler.CosineAnnealingLR( self.optimizer, T_max=100) self.criterion = nn.BCELoss() if self.cfg.logger.resume: # load checkpoint print("Loading checkpoint") save_dir = self.cfg.directory.load checkpoint = load_checkpoint(save_dir, self.device) self.model.load_state_dict(checkpoint["model"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) self.epoch = checkpoint["epoch"] self.e_loss = checkpoint["e_loss"] self.best = checkpoint["best"] print( f"{datetime.now():%Y-%m-%d %H:%M:%S} " f"Loading checkpoint was successful, start from epoch {self.epoch}" f" and loss {self.best}") else: self.epoch = 1 self.best = np.inf self.e_loss = [] # initialize the early_stopping object self.early_stopping = EarlyStopping( patience=self.cfg.train_params.patience, verbose=True, delta=self.cfg.train_params.early_stopping_delta, ) # stochastic weight averaging self.swa_model = AveragedModel(self.model) self.swa_scheduler = SWALR(self.optimizer, **self.cfg.SWA)
def test(self, model_path=''): print('Test model') try: print('*** Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path) except ValueError as e: print('Error while loading the model.', e) print('\nTest all') # acc = np.mean(self.accuracies) # acc = self.accuracies graphs = self.data[GRAPH] labels = self.labels self.run_test(graphs, labels) print('\nTest on train graphs') graphs = load_pickle(os.path.join(self.odir, 'train')) labels = load_pickle(os.path.join(self.odir, 'train_labels')) self.run_test(graphs, labels) print('\nTest on test graphs') graphs = load_pickle(os.path.join(self.odir, 'test')) labels = load_pickle(os.path.join(self.odir, 'test_labels')) self.run_test(graphs, labels)
def load_model_state(self, model_path=''): try: print('[App][load_model_state] *** Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path, self.is_cuda) except ValueError as e: print('Error while loading the model.', e)
def test_on_data(self, model_path=''): print('Test model') try: print('*** Load pre-trained model ' + model_path + ' ***') self.model = load_checkpoint(self.model, model_path) except ValueError as e: print('Error while loading the model.', e) print('\nTest on data') # acc = np.mean(self.accuracies) # acc = self.accuracies graphs = self.data[GRAPH] labels = self.labels self.run_test(graphs, labels)
def test(self, data, load_path='', mode=NODE_CLASSIFICATION): try: print('*** Load pre-trained model ***') self.model = load_checkpoint(self.model, load_path) except ValueError as e: print('Error while loading the model.', e) if mode == NODE_CLASSIFICATION: test_mask = data[TEST_MASK] labels = data[LABELS] acc, _ = self.model.eval_node_classification(labels, test_mask) else: acc = np.mean(self.accuracies) print("\nTest Accuracy {:.4f}".format(acc)) return acc
def train(self, save_path='', k_fold=10, train_list_file=None, test_list_file=None): if self.pretrained_weight is not None: self.model = load_checkpoint(self.model, self.pretrained_weight, self.is_cuda) save_dir = save_path.split('/checkpoint')[0] loss_fcn = torch.nn.CrossEntropyLoss() # initialize graphs self.accuracies = np.zeros(k_fold) graphs = self.data[GRAPH] # load all the graphs # debug purposes: reshuffle all the data before the splitting random_indices = list(range(len(graphs))) random.shuffle(random_indices) graphs = [graphs[i] for i in random_indices] labels = self.labels[random_indices] graphs_names = [self.graphs_names[i] for i in random_indices] split_train_test = True if train_list_file is None and test_list_file is None else False print('[app][train] split_train_test', split_train_test) ''' if split_train_test is True: print('[app][train] train_list_file', train_list_file) print('[app][train] test_list_file', test_list_file) ############################# # Create new train/test set # Split train and test ############################# train_size = int(self.TRAIN_SIZE * len(graphs)) g_train = graphs[:train_size] l_train = labels[:train_size] n_train = graphs_names[:train_size] g_test = graphs[train_size:] l_test = labels[train_size:] n_test = graphs_names[train_size:] else: ############################# # Load train and test graphs from list ############################# train_files = [] test_files = [] g_train = [] l_train = [] n_train = [] g_test = [] l_test = [] n_test = [] with open(train_list_file, 'r') as f: train_files = [l.strip() for l in f.readlines()] with open(test_list_file, 'r') as f: test_files = [l.strip() for l in f.readlines()] for i in range(len(labels)): graph_jsonpath = graphs_names[i] # print(graph_jsonpath) if graph_jsonpath in train_files: g_train.append(graphs[i]) l_train.append(labels[i]) n_train.append(graphs_names[i]) if graph_jsonpath in test_files: g_test.append(graphs[i]) l_test.append(labels[i]) n_test.append(graphs_names[i]) l_train = torch.Tensor(l_train).type(torch.LongTensor) l_test = torch.Tensor(l_test).type(torch.LongTensor) if self.is_cuda is True: l_train = l_train.cuda() l_test = l_test.cuda() ''' print('[app][train] len labels', len(labels)) print('[app][train] len g_train', len(g_train)) # print('[app][train] g_train', g_train) if not os.path.isdir(self.odir): os.makedirs(self.odir) save_pickle(g_train, os.path.join(self.odir, 'train')) save_pickle(l_train, os.path.join(self.odir, 'train_labels')) save_pickle(g_test, os.path.join(self.odir, 'test')) save_pickle(l_test, os.path.join(self.odir, 'test_labels')) # save graph name list to txt file save_txt(n_train, os.path.join(self.odir, 'train_list.txt')) save_txt(n_test, os.path.join(self.odir, 'test_list.txt')) K = k_fold for k in range(K): self.model = self.ModelObj(g=self.data_graph[0], config_params=self.model_config, n_classes=self.data_nclasses, n_rels=self.data_nrels, n_entities=self.data_nentities, is_cuda=self.is_cuda, batch_size=1, model_src_path=self.model_src_path) print('*** [app][__init__] Model layers ***') for name, param in self.model.named_parameters(): if param.requires_grad: print('\t', name, param.data.type()) print('>>> [app][__init__] self.model.fc.weight.type', self.model.fc.weight.type()) optimizer = torch.optim.Adam( self.model.parameters(), lr=self.learning_config['lr'], weight_decay=self.learning_config['weight_decay']) start = int(len(g_train) / K) * k end = int(len(g_train) / K) * (k + 1) print('\n\n\n[app][train] Process new k=' + str(k) + ' | ' + str(start) + '-' + str(end)) # training batch train_batch_graphs = g_train[:start] + g_train[end:] train_batch_labels = l_train[list(range(0, start)) + list(range(end + 1, len(g_train)))] train_batch_samples = list( map(list, zip(train_batch_graphs, train_batch_labels))) train_batches = DataLoader( train_batch_samples, batch_size=self.learning_config['batch_size'], shuffle=True, collate_fn=collate) # testing batch val_batch_graphs = g_train[start:end] val_batch_labels = l_train[start:end] # print('[app][train] val_batch_graphs', val_batch_graphs) print('[app][train] len val_batch_graphs', len(val_batch_graphs)) print('[app][train] val_batch_graphs[0].number_of_nodes()', val_batch_graphs[0].number_of_nodes()) print('[app][train] val_batch_graphs[-1].number_of_nodes()', val_batch_graphs[-1].number_of_nodes()) val_batch = dgl.batch(val_batch_graphs) print('[app][train] train_batches size: ', len(train_batches)) print('[app][train] train_batch_graphs size: ', len(train_batch_graphs)) print('[app][train] val_batch_graphs size: ', len(val_batch_graphs)) print('[app][train] train_batches', train_batches) print('[app][train] val_batch_labels', val_batch_labels) dur = [] for epoch in range(self.learning_config['epochs']): self.model.train() if epoch >= 3: t0 = time.time() losses = [] training_accuracies = [] for iter_idx, (bg, label) in enumerate(train_batches): # print('~~~ [app][train] bg', bg) logits = self.model(bg) if self.learning_config['cuda']: label = label.cuda() loss = loss_fcn(logits, label) losses.append(loss.item()) _, indices = torch.max(logits, dim=1) # print('~~~~ logits', logits) # print('------------------') print('\t [app][train] indices', indices) # print('\t label', label) correct = torch.sum(indices == label) training_accuracies.append(correct.item() * 1.0 / len(label)) optimizer.zero_grad() loss.backward(retain_graph=True) # loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) val_acc, val_loss, _ = self.model.eval_graph_classification( val_batch_labels, val_batch) print( "[app][train] Epoch {:05d} | Time(s) {:.4f} | train_acc {:.4f} | train_loss {:.4f} | val_acc {:.4f} | val_loss {:.4f}" .format(epoch, np.mean(dur) if dur else 0, np.mean(training_accuracies), np.mean(losses), val_acc, val_loss)) is_better = self.early_stopping(val_loss, self.model, save_path) if is_better: self.accuracies[k] = val_acc if self.early_stopping.early_stop: # Print model's state_dict # print("*** Model's state_dict:") # for param_tensor in self.model.state_dict(): # print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) # # Print optimizer's state_dict # print("*** Optimizer's state_dict:") # for var_name in optimizer.state_dict(): # print(var_name, "\t", optimizer.state_dict()[var_name]) # Save state dict # torch.save(self.model.state_dict(), save_dir+'/model_state.pt') # Save model # torch.save({ # 'epoch': epoch, # 'model_state_dict': self.model.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'val_loss': val_loss, # }, save_dir+'/saved') print("[app][train] Early stopping") break self.early_stopping.reset()
def train(self, save_path='', k_fold=10, train_list_file=None, test_list_file=None): if self.pretrained_weight is not None: self.model = load_checkpoint(self.model, self.pretrained_weight) loss_fcn = torch.nn.CrossEntropyLoss() # initialize graphs self.accuracies = np.zeros(k_fold) graphs = self.data[GRAPH] # load all the graphs # debug purposes: reshuffle all the data before the splitting random_indices = list(range(len(graphs))) random.shuffle(random_indices) graphs = [graphs[i] for i in random_indices] labels = self.labels[random_indices] graphs_names = [self.graphs_names[i] for i in random_indices] split_train_test = True if train_list_file is None and test_list_file is None else False print('split_train_test', split_train_test) print('train_list_file', train_list_file) print('test_list_file', test_list_file) if split_train_test is True: ############################# # Create new train/test set # Split train and test ############################# train_size = int(self.TRAIN_SIZE * len(graphs)) g_train = graphs[:train_size] l_train = labels[:train_size] n_train = graphs_names[:train_size] g_test = graphs[train_size:] l_test = labels[train_size:] n_test = graphs_names[train_size:] else: ############################# # Load train and test graphs from list ############################# train_files = [] test_files = [] g_train = [] l_train = [] n_train = [] g_test = [] l_test = [] n_test = [] with open(train_list_file, 'r') as f: train_files = [l.strip() for l in f.readlines()] with open(test_list_file, 'r') as f: test_files = [l.strip() for l in f.readlines()] for i in range(len(labels)): graph_jsonpath = graphs_names[i] # print(graph_jsonpath) if graph_jsonpath in train_files: g_train.append(graphs[i]) l_train.append(labels[i]) n_train.append(graphs_names[i]) if graph_jsonpath in test_files: g_test.append(graphs[i]) l_test.append(labels[i]) n_test.append(graphs_names[i]) l_train = torch.Tensor(l_train).type(torch.LongTensor) l_test = torch.Tensor(l_test).type(torch.LongTensor) if self.is_cuda is True: l_train = l_train.cuda() l_test = l_test.cuda() # print('len g_train', len(g_train)) # print('g_train', g_train) if not os.path.isdir(self.odir): os.makedirs(self.odir) save_pickle(g_train, os.path.join(self.odir, 'train')) save_pickle(l_train, os.path.join(self.odir, 'train_labels')) save_pickle(g_test, os.path.join(self.odir, 'test')) save_pickle(l_test, os.path.join(self.odir, 'test_labels')) # save graph name list to txt file save_txt(n_train, os.path.join(self.odir, 'train_list.txt')) save_txt(n_test, os.path.join(self.odir, 'test_list.txt')) K = k_fold for k in range(K): # K-fold cross validation # create GNN model # self.model = Model(g=self.data[GRAPH], # config_params=self.model_config, # n_classes=self.data[N_CLASSES], # n_rels=self.data[N_RELS] if N_RELS in self.data else None, # n_entities=self.data[N_ENTITIES] if N_ENTITIES in self.data else None, # is_cuda=self.learning_config['cuda'], # seq_dim=self.seq_max_length, # batch_size=1) optimizer = torch.optim.Adam( self.model.parameters(), lr=self.learning_config['lr'], weight_decay=self.learning_config['weight_decay']) if self.learning_config['cuda']: self.model.cuda() start = int(len(g_train) / K) * k end = int(len(g_train) / K) * (k + 1) print('\n\n\nProcess new k=' + str(k) + ' | ' + str(start) + '-' + str(end)) # testing batch val_batch_graphs = g_train[start:end] val_batch_labels = l_train[start:end] val_batch = dgl.batch(val_batch_graphs) # training batch train_batch_graphs = g_train[:start] + g_train[end:] train_batch_labels = l_train[list(range(0, start)) + list(range(end + 1, len(g_train)))] train_batch_samples = list( map(list, zip(train_batch_graphs, train_batch_labels))) train_batches = DataLoader( train_batch_samples, batch_size=self.learning_config['batch_size'], shuffle=True, collate_fn=collate) print('train_batches size: ', len(train_batches)) print('train_batch_graphs size: ', len(train_batch_graphs)) print('val_batch_graphs size: ', len(val_batch_graphs)) print('train_batches', train_batches) print('val_batch_labels', val_batch_labels) dur = [] for epoch in range(self.learning_config['epochs']): self.model.train() if epoch >= 3: t0 = time.time() losses = [] training_accuracies = [] for iter_idx, (bg, label) in enumerate(train_batches): logits = self.model(bg) if self.learning_config['cuda']: label = label.cuda() loss = loss_fcn(logits, label) losses.append(loss.item()) _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == label) training_accuracies.append(correct.item() * 1.0 / len(label)) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() if epoch >= 3: dur.append(time.time() - t0) val_acc, val_loss, _ = self.model.eval_graph_classification( val_batch_labels, val_batch) print( "Epoch {:05d} | Time(s) {:.4f} | train_acc {:.4f} | train_loss {:.4f} | val_acc {:.4f} | val_loss {:.4f}" .format(epoch, np.mean(dur) if dur else 0, np.mean(training_accuracies), np.mean(losses), val_acc, val_loss)) is_better = self.early_stopping(val_loss, self.model, save_path) if is_better: self.accuracies[k] = val_acc if self.early_stopping.early_stop: print("Early stopping") break self.early_stopping.reset()
def __init__(self, cfg_dir: str, data_loader_train: DataLoader, data_loader_val: DataLoader, encoder, decoder, labels_definition): self.cfg = get_conf(cfg_dir) self._labels_definition = labels_definition self.logger = self.init_logger(self.cfg.logger) self.task = decoder self.data_train = data_loader_train self.data_val = data_loader_val self.model = encoder self.decoder = decoder.mlp self.sig = nn.Sigmoid() #self.model._resnet.conv1.apply(init_weights_normal) self.device = self.cfg.train_params.device self.model = self.model.to(device=self.device) self.decoder = self.decoder.to(device=self.device) if self.cfg.train_params.optimizer.lower() == "adam": params = list(self.model.parameters()) + list( self.decoder.parameters()) self.optimizer = optim.Adam(params, **self.cfg.adam) elif self.cfg.train_params.optimizer.lower() == "rmsprop": self.optimizer = optim.RMSprop( [self.model.parameters(), self.decoder.parameters()], **self.cfg.rmsprop) else: raise ValueError( f"Unknown optimizer {self.cfg.train_params.optimizer}") self.lr_scheduler = optim.lr_scheduler.CosineAnnealingLR( self.optimizer, T_max=100) self.criterion_lbl = nn.BCELoss() self.criterion_box = nn.MSELoss() if self.cfg.logger.resume: # load checkpoint print("Loading checkpoint") save_dir = self.cfg.directory.load checkpoint = load_checkpoint(save_dir, self.device) self.model.load_state_dict(checkpoint["model"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) self.epoch = checkpoint["epoch"] self.e_loss = checkpoint["e_loss"] self.best = checkpoint["best"] print( f"{datetime.now():%Y-%m-%d %H:%M:%S} " f"Loading checkpoint was successful, start from epoch {self.epoch}" f" and loss {self.best}") else: self.epoch = 1 self.best = np.inf self.e_loss = [] # initialize the early_stopping object self.early_stopping = EarlyStopping( patience=self.cfg.train_params.patience, verbose=True, delta=self.cfg.train_params.early_stopping_delta, ) # stochastic weight averaging self.swa_model = AveragedModel(self.model)