def __init__(self, config): super().__init__() self.config = config graph_transformations = [] for transform in config.graph_transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) graph_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.graph_transform = (transforms.Compose(graph_transformations) if graph_transformations != [] else None) image_transformations = [] for transform in config.image_transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) image_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.image_transform = (transforms.Compose(image_transformations) if image_transformations != [] else None) if config.filepath.indices_csv != None: data_path = config.filepath.indices_csv else: data_path = config.filepath.data self.dir_path = config.filepath.data self.data = pd.read_csv(data_path) self.image_paths = np.array(self.data["path"]) self.labels = np.array(self.data["img_id"])
def __init__(self, config): self.config = config transformations = [] if hasattr(config, "transform_args"): for transform in config.transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.transform = (transforms.Compose(transformations) if transformations != [] else None) pre_transformations = [] if hasattr(config, "pre_transform_args"): for pre_transform in config.pre_transform_args: param_dict = (dict(pre_transform["params"]) if pre_transform["params"] is not None else {}) pre_transformations.append( configmapper.get_object( "transforms", pre_transform["type"])(**param_dict)) self.pre_transform = (transforms.Compose(pre_transformations) if pre_transformations != [] else None) self.dataset = datasets.MNISTSuperpixels( root=config.load_dataset_args.path, train=self.config.split == "train", transform=self.transform, pre_transform=self.pre_transform, )
def __init__(self, config): super().__init__() self.config = config graph_transformations = [] for transform in config.graph_transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) graph_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.graph_transform = (transforms.Compose(graph_transformations) if graph_transformations != [] else None) image_transformations = [] for transform in config.image_transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) image_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.image_transform = (transforms.Compose(image_transformations) if image_transformations != [] else None) self.data_paths_df = pd.read_csv(config.data_paths_csv) self.data_paths_df["path"] = self.data_paths_df["path"].apply( lambda x: os.path.join( "/".join(config.data_paths_csv.split("/")[:-1]), x))
def preprocess(self, model_config, data_config): train_dataset = configmapper.get_object( "datasets", data_config.main.name)(data_config.train, self.tokenizer) val_dataset = configmapper.get_object( "datasets", data_config.main.name)(data_config.val, self.tokenizer) model = configmapper.get_object("models", model_config.name)( self.embeddings, **model_config.params.as_dict()) return model, train_dataset, val_dataset
def __init__(self, config): super(Projection, self).__init__() self.cnn = configmapper.get_object("models", config.cnn_config.name)( config.cnn_config) self.gcn = configmapper.get_object("models", config.gnn_config.name)( config.gnn_config) self.linear_layer = Linear( config.cnn_config.num_classes + config.gnn_config.num_classes, config.num_classes, ) self.loss_fn = CrossEntropyLoss()
def __init__(self, config): super().__init__() self.config = config graph_transformations = [] for transform in config.graph_transform_args: param_dict = ( dict(transform["params"]) if transform["params"] is not None else {} ) graph_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict) ) self.graph_transform = ( transforms.Compose(graph_transformations) if graph_transformations != [] else None ) image_transformations = [] for transform in config.image_transform_args: param_dict = ( dict(transform["params"]) if transform["params"] is not None else {} ) image_transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict) ) self.image_transform = ( transforms.Compose(image_transformations) if image_transformations != [] else None ) with open(config.filepath.image, "rb") as f: # First 16 bytes contain some metadata _ = f.read(4) size = struct.unpack(">I", f.read(4))[0] _ = f.read(8) self.images = np.frombuffer(f.read(), dtype=np.uint8).reshape(size, 28, 28) # Labels with open(config.filepath.labels, "rb") as f: # First 8 bytes contain some metadata _ = f.read(8) self.labels = np.frombuffer(f.read(), dtype=np.uint8) if config.filepath.indices_csv != None: filtered_indices = list(pd.read_csv(config.filepath.indices_csv)["index"]) self.images = np.take(self.images, filtered_indices, axis=0) self.labels = np.take(self.labels, filtered_indices, axis=0)
def __init__(self, config): super().__init__() self.config = config transformations = [] for transform in config.transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.transform = (transforms.Compose(transformations) if transformations != [] else None) with open(config.filepath.data, "rb") as f: self.data = pickle.load(f, encoding="bytes") self.images = self.data[b"data"] self.labels = self.data[config.label.encode("UTF-8")] if config.filepath.indices_csv != None: filtered_indices = list( pd.read_csv(config.filepath.indices_csv)["index"]) self.images = np.take(self.images, filtered_indices, axis=0) self.labels = np.take(self.labels, filtered_indices, axis=0) self.images = np.transpose(np.reshape(self.images, (-1, 3, 32, 32)), (0, 2, 3, 1))
def __init__(self, config): super().__init__() self.config = config transformations = [] for transform in config.transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.transform = (transforms.Compose(transformations) if transformations != [] else None) self.data = fetch_lfw_people(data_home=config.filepath.data, color=True, min_faces_per_person=20) if config.filepath.indices_csv != None: filtered_indices = list( pd.read_csv(config.filepath.indices_csv)["indices"]) self.images = np.take(self.data.images, filtered_indices, axis=0) self.labels = np.take(self.data.target, filtered_indices, axis=0) else: self.images = self.data.images self.labels = self.data.target self.images = self.images.astype(np.uint8)
def __init__(self, config): """ Args: config (src.utils.module.Config): configuration for preprocessor """ super(GlovePreprocessor, self).__init__() self.config = config self.tokenizer = configmapper.get_object( "tokenizers", self.config.main.preprocessor.tokenizer.name )(**self.config.main.preprocessor.tokenizer.init_params.as_dict()) self.tokenizer_params = (self.config.main.preprocessor.tokenizer. init_vector_params.as_dict()) self.tokenizer.initialize_vectors(**self.tokenizer_params) self.embeddings = configmapper.get_object( "embeddings", self.config.main.preprocessor.embedding.name)( self.tokenizer.text_field.vocab.vectors, self.tokenizer.text_field.vocab.stoi[ self.tokenizer.text_field.pad_token], )
def convert_params_to_dict(params): dic = {} for k, v in params.as_dict(): try: obj = configmapper.get_object("params", v) dic[k] = v except: print( f"Undefined {v} for the given key: {k} in mapper ,storing original value" ) dic[k] = v return dic
def __init__(self, config): """ Args: config (src.utils.module.Config): configuration for preprocessor """ super(TransformersConcretenessPreprocessor, self).__init__() self.config = config self.tokenizer = configmapper.get_object( "tokenizers", self.config.main.preprocessor.tokenizer.name).from_pretrained( ** self.config.main.preprocessor.tokenizer.init_params.as_dict())
def map_dict_to_obj(dic): result_dic = {} if dic is not None: for k, v in dic.items(): if isinstance(v, dict): result_dic[k] = map_dict_to_obj(v) else: try: obj = configmapper.get_object("params", v) result_dic[k] = obj except: result_dic[k] = v return result_dic
def __init__(self, config): self._config = config self.metrics = { configmapper.get_object("metrics", metric["type"]): metric["params"] for metric in self._config.main_config.metrics } self.train_config = self._config.train self.val_config = self._config.val self.log_label = self.train_config.log.log_label if self.train_config.log_and_val_interval is not None: self.val_log_together = True print("Logging with label: ", self.log_label)
def __init__(self, config): self.config = config self.image_column_name = config.image_column_name self.label_column_name = config.label_column_name self.channels_first_input = config.channels_first_input transformations = [] for transform in config.transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.transform = (transforms.Compose(transformations) if transformations != [] else None) self.raw_dataset = load_dataset(**config.load_dataset_args) if config.remove_columns is not None: self.raw_dataset = self.raw_dataset.remove_columns( config.remove_columns) self.raw_dataset.set_format( "torch", columns=self.raw_dataset["train"].column_names) features = datasets.Features({ self.image_column_name: datasets.Array3D( shape=tuple(self.config.features.image_output_shape), dtype="float32", ), self.label_column_name: datasets.features.ClassLabel( names=list(self.config.features.label_names)), }) self.train_dataset = self.raw_dataset.map( self.prepare_features, features=features, batched=True, batch_size=64, ) if self.image_column_name != "image": self.train_dataset = self.train_dataset.rename_column( self.image_column_name, "image") if self.label_column_name != "label": self.train_dataset = self.train_dataset.rename_column( self.label_column_name, "label") self.train_dataset.set_format("torch", columns=["image", "label"])
def __init__(self, config): self._config = config self.metrics = { configmapper.get_object("metrics", metric["type"]): metric["params"] for metric in self._config.main_config.metrics } self.train_config = self._config.train self.val_config = self._config.val self.log_label = self.train_config.log.log_label self.device = torch.device(self._config.main_config.device.name) if self.train_config.log_and_val_interval is not None: self.train_config.val_interval = self.train_config.log_and_val_interval self.train_config.log.log_interval = self.train_config.log_and_val_interval print("Logging with label: ", self.log_label)
def __init__(self, config): self._config = config self.metrics = { configmapper.get_object("metrics", metric["type"]): metric["params"] for metric in self._config.main_config.metrics } self.train_config = self._config.train self.val_config = self._config.val self.log_label = self.train_config.log.log_label if self.train_config.log_and_val_interval is not None: self.val_log_together = True ckpts_dir = os.path.split(self.train_config.save_on.best_path)[0] if not os.path.exists(ckpts_dir): os.makedirs(ckpts_dir) print("Logging with label: ", self.log_label)
def __init__(self, config): self.config = config transformations = [] for transform in config.transform_args: param_dict = (dict(transform["params"]) if transform["params"] is not None else {}) transformations.append( configmapper.get_object("transforms", transform["type"])(**param_dict)) self.transform = (transforms.Compose(transformations) if transformations != [] else None) self.dataset = datasets.MNIST( config.load_dataset_args.path, download=True, train=self.config.split == "train", transform=self.transform, )
def val( self, model, dataset, global_step, train_logger=None, train_log_values=None, log=True, ): append_text = self.val_config.append_text criterion_params = self.train_config.criterion.params if criterion_params: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)(**criterion_params.as_dict(), device=self.device) else: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)() if train_logger is not None: val_logger = train_logger else: val_logger = Logger(**self.val_config.log.logger_params.as_dict()) if train_log_values is not None: val_log_values = train_log_values else: val_log_values = self.val_config.log.vals.as_dict() if self._config.dataloader_type == "geometric": val_loader = GeometricDataLoader( dataset, **self.val_config.loader_params.as_dict()) else: val_loader = DataLoader(dataset=dataset, **self.val_config.loader_params.as_dict()) all_outputs = torch.Tensor().to(self.device) if self.train_config.label_type == "float": all_labels = torch.FloatTensor().to(self.device) else: all_labels = torch.LongTensor().to(self.device) with torch.no_grad(): model.eval() val_loss = 0 for j, batch in enumerate(val_loader): for key in batch: batch[key] = batch[key].to(self.device) inputs = {} for key in self._config.input_key: inputs[key] = batch[key] labels = batch["label"] # NOW THIS MUST BE HANDLED IN THE DATASET CLASS # if self.train_config.label_type == "float": # # Specific to Float Type # labels = labels.float() outputs = model(**inputs) loss = criterion(outputs, labels) val_loss += loss.item() all_labels = torch.cat((all_labels, labels), 0) outputs = outputs[0] * self.alpha + outputs[1] * (1 - self.alpha) if self.train_config.label_type == "float": all_outputs = torch.cat((all_outputs, outputs), 0) else: all_outputs = torch.cat( (all_outputs, torch.argmax(outputs, axis=1)), 0) val_loss = val_loss / len(val_loader) val_loss_name = self.train_config.criterion.type # print(all_outputs, all_labels) metric_list = [ metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric]) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] return_dic = dict( zip( [ val_loss_name, ] + metric_name_list, [ val_loss, ] + metric_list, )) if log: val_scores = self.log( val_loss, val_loss_name, metric_list, metric_name_list, val_logger, val_log_values, global_step, append_text, ) return val_scores return return_dic
def train(self, model, train_dataset, val_dataset=None, logger=None): device = torch.device(self._config.main_config.device.name) model.to(device) optim_params = self.train_config.optimizer.params if optim_params: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters(), **optim_params.as_dict()) else: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters()) if self.train_config.scheduler is not None: scheduler_params = self.train_config.scheduler.params if scheduler_params: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)( optimizer, **scheduler_params.as_dict()) else: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)(optimizer) criterion_params = self.train_config.criterion.params if criterion_params: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)(**criterion_params.as_dict()) else: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)() train_loader = DataLoader( train_dataset, **self.train_config.loader_params.as_dict(), collate_fn=train_dataset.custom_collate_fn, ) # train_logger = Logger(**self.train_config.log.logger_params.as_dict()) max_epochs = self.train_config.max_epochs batch_size = self.train_config.loader_params.batch_size if self.val_log_together: val_interval = self.train_config.log_and_val_interval log_interval = val_interval else: val_interval = self.train_config.val_interval log_interval = self.train_config.log.log_interval if logger is None: train_logger = Logger( **self.train_config.log.logger_params.as_dict()) else: train_logger = logger train_log_values = self.train_config.log.values.as_dict() best_score = (-math.inf if self.train_config.save_on.desired == "max" else math.inf) save_on_score = self.train_config.save_on.score best_step = -1 best_model = None print("\nTraining\n") # print(max_steps) global_step = 0 for epoch in range(1, max_epochs + 1): print("Epoch: {}/{}, Global Step: {}".format( epoch, max_epochs, global_step)) train_loss = 0 val_loss = 0 all_labels = torch.FloatTensor().to(device) all_outputs = torch.Tensor().to(device) pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size)) pbar.set_description("Epoch " + str(epoch)) val_counter = 0 for step, batch in enumerate(train_loader): optimizer.zero_grad() batch = [torch.tensor(value, device=device) for value in batch] # print(batch[0].shape,batch) *inputs, labels = batch # print(inputs[0],inputs[1]) # labels = labels.float() outputs = model(inputs) # print(outputs,labels) loss = criterion(outputs, labels) loss.backward() all_labels = torch.cat((all_labels, labels), 0) all_outputs = torch.cat( (all_outputs, torch.argmax(outputs, axis=1)), 0) train_loss += loss.item() optimizer.step() if self.train_config.scheduler is not None: scheduler.step(epoch + i / len(train_loader)) # print(train_loss) # print(step+1) pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}") pbar.update(1) global_step += 1 # Need to check if we want global_step or local_step if val_dataset is not None and ( global_step) % val_interval == 0: print("\nEvaluating\n") val_scores = self.val( model, val_dataset, criterion, device, global_step, train_logger, train_log_values, ) model.train() save_flag = 0 if self.train_config.save_on is not None: train_loss_name = self.train_config.criterion.type training_loss = train_loss / global_step metric_list = [ metric( all_outputs.detach().cpu(), all_labels.cpu(), **self.metrics[metric], ) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] train_scores = dict( zip( [ train_loss_name, ] + metric_name_list, [ training_loss, ] + metric_list, )) if self.train_config.save_on.desired == "min": if val_scores[save_on_score] < best_score: save_flag = 1 best_score = val_scores[save_on_score] best_step = global_step else: if val_scores[save_on_score] > best_score: save_flag = 1 best_score = val_scores[save_on_score] best_step = global_step if save_flag: torch.save( { "model_state_dict": model, "best_step": best_step, "best_score": best_score, "save_on_score": save_on_score, }, self.train_config.save_on.best_path.format( self.log_label), ) hparam_list = [] hparam_name_list = [] if self.train_config.log.values.hparams is not None: for hparam in self.train_config.log.values.hparams: hparam_list.append( get_item_in_config( self._config, hparam["path"])) hparam_name_list.append(hparam["name"]) val_keys, val_values = zip(*val_scores.items()) train_keys, train_values = zip( *train_scores.items()) val_keys = list(val_keys) train_keys = list(train_keys) val_values = list(val_values) train_values = list(train_values) for i, key in enumerate(val_keys): val_keys[i] = ( f"hparams/{self.log_label}/best_val_val_" + val_keys[i]) for i, key in enumerate(train_keys): train_keys[i] = ( f"hparams/{self.log_label}/best_val_train_" + train_keys[i]) train_logger.save_hyperparams( hparam_list, hparam_name_list, train_values + val_values, train_keys + val_keys, ) if (global_step - 1) % log_interval == 0: print("\nLogging\n") train_loss_name = self.train_config.criterion.type outputs = torch.argmax(outputs, axis=1) metric_list = [ metric(outputs.detach().cpu(), labels.cpu(), **self.metrics[metric]) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / global_step, train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, global_step, append_text=self.train_config.append_text, ) pbar.close() if not os.path.exists(self.train_config.checkpoint.checkpoint_dir): os.makedirs(self.train_config.checkpoint.checkpoint_dir) torch.save( model.state_dict(), f"{self.train_config.checkpoint.checkpoint_dir}_{str(self.train_config.log.log_label)}" + "_" + str(epoch) + ".pth", ) '''
grid_search = args.grid_search # log_dir = "/content/drive/MyDrive/SuperPixels/logs/" log_dir = "./logs/" # Seed seed(train_config.main_config.seed) # Data if "main" in data_config.as_dict().keys(): # Regular Data if args.validation: train_data_config = data_config.train_val.train val_data_config = data_config.train_val.val else: train_data_config = data_config.train val_data_config = data_config.val train_data = configmapper.get_object( "datasets", train_data_config.name)(train_data_config) val_data = configmapper.get_object("datasets", val_data_config.name)(val_data_config) else: # HF Type Data dataset = configmapper.get_object("datasets", data_config.name)(data_config) train_data = dataset.train_dataset["train"] val_data = dataset.train_dataset["test"] # Logger logger = Logger(log_path=os.path.join( log_dir, args.config_dir.strip("/").split("/")[-1] + ("" if args.validation else "_orig"),
default=False, ) ### Update Tips : Can provide more options to the user. ### Can also provide multiple verbosity levels. args = parser.parse_args() # print(vars(args)) model_config = Config(path=args.model) train_config = Config(path=args.train) data_config = Config(path=args.data) grid_search = args.grid_search # verbose = args.verbose # Preprocessor, Dataset, Model preprocessor = configmapper.get_object( "preprocessors", data_config.main.preprocessor.name)(data_config) if grid_search: train_configs = generate_grid_search_configs(train_config, train_config.grid_search) print(f"Total Configurations Generated: {len(train_configs)}") logger = Logger(**train_config.grid_search.hyperparams.train.log. logger_params.as_dict()) for train_config in train_configs: print(train_config) ## Seed seed(train_config.main_config.seed)
help="The configuration for model training/evaluation", ) parser.add_argument( "--data", type=str, action="store", help="The configuration for data", ) args = parser.parse_args() # print(vars(args)) train_config = OmegaConf.load(args.train) data_config = OmegaConf.load(args.data) print(data_config.train_files) dataset = configmapper.get_object("datasets", data_config.name)(data_config) untokenized_train_dataset = dataset.dataset tokenized_train_dataset = dataset.tokenized_inputs tokenized_test_dataset = dataset.test_tokenized_inputs model_class = configmapper.get_object("models", train_config.model_name) if "toxic-bert" in train_config.pretrained_args.pretrained_model_name_or_path: toxicbert_model = AutoModelForSequenceClassification.from_pretrained( train_config.pretrained_args.pretrained_model_name_or_path) train_config.pretrained_args.pretrained_model_name_or_path = "bert-base-uncased" model = model_class.from_pretrained(**train_config.pretrained_args) model.bert = deepcopy(toxicbert_model.bert) gc.collect() elif "toxic-roberta" in train_config.pretrained_args.pretrained_model_name_or_path:
def __init__(self, config): super().__init__() self.cnn = configmapper.get_object("models", config.cnn_config.name)( config.cnn_config) self.gcn = configmapper.get_object("models", config.gnn_config.name)( config.gnn_config)
def train(self, model, train_dataset, val_dataset=None, logger=None): device = torch.device(self._config.main_config.device.name) model.to(device) optim_params = self.train_config.optimizer.params if optim_params: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters(), **optim_params.as_dict()) else: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters()) if self.train_config.scheduler is not None: scheduler_params = self.train_config.scheduler.params if scheduler_params: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)( optimizer, **scheduler_params.as_dict()) else: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)(optimizer) criterion_params = self.train_config.criterion.params if criterion_params: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)(**criterion_params.as_dict()) else: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)() if "custom_collate_fn" in dir(train_dataset): train_loader = DataLoader( dataset=train_dataset, collate_fn=train_dataset.custom_collate_fn, **self.train_config.loader_params.as_dict(), ) else: train_loader = DataLoader( dataset=train_dataset, **self.train_config.loader_params.as_dict()) # train_logger = Logger(**self.train_config.log.logger_params.as_dict()) max_epochs = self.train_config.max_epochs batch_size = self.train_config.loader_params.batch_size if self.val_log_together: val_interval = self.train_config.log_and_val_interval log_interval = val_interval else: val_interval = self.train_config.val_interval log_interval = self.train_config.log.log_interval if logger is None: train_logger = Logger( **self.train_config.log.logger_params.as_dict()) else: train_logger = logger train_log_values = self.train_config.log.values.as_dict() best_score = (-math.inf if self.train_config.save_on.desired == "max" else math.inf) save_on_score = self.train_config.save_on.score best_step = -1 best_model = None best_hparam_list = None best_hparam_name_list = None best_metrics_list = None best_metrics_name_list = None # print("\nTraining\n") # print(max_steps) global_step = 0 for epoch in range(1, max_epochs + 1): print("Epoch: {}/{}, Global Step: {}".format( epoch, max_epochs, global_step)) train_loss = 0 val_loss = 0 if (self.train_config.label_type == 'float'): all_labels = torch.FloatTensor().to(device) else: all_labels = torch.LongTensor().to(device) all_outputs = torch.Tensor().to(device) train_scores = None val_scores = None pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size)) pbar.set_description("Epoch " + str(epoch)) val_counter = 0 for step, batch in enumerate(train_loader): model.train() optimizer.zero_grad() inputs, labels = batch if (self.train_config.label_type == 'float' ): ##Specific to Float Type labels = labels.float() for key in inputs: inputs[key] = inputs[key].to(device) labels = labels.to(device) outputs = model(inputs) loss = criterion(torch.squeeze(outputs), labels) loss.backward() all_labels = torch.cat((all_labels, labels), 0) if (self.train_config.label_type == 'float'): all_outputs = torch.cat((all_outputs, outputs), 0) else: all_outputs = torch.cat( (all_outputs, torch.argmax(outputs, axis=1)), 0) train_loss += loss.item() optimizer.step() if self.train_config.scheduler is not None: if isinstance(scheduler, ReduceLROnPlateau): scheduler.step(train_loss / (step + 1)) else: scheduler.step() # print(train_loss) # print(step+1) pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}") pbar.update(1) global_step += 1 # Need to check if we want global_step or local_step if val_dataset is not None and (global_step - 1) % val_interval == 0: # print("\nEvaluating\n") val_scores = self.val( model, val_dataset, criterion, device, global_step, train_logger, train_log_values, ) #save_flag = 0 if self.train_config.save_on is not None: ## BEST SCORES UPDATING train_scores = self.get_scores( train_loss, global_step, self.train_config.criterion.type, all_outputs, all_labels, ) best_score, best_step, save_flag = self.check_best( val_scores, save_on_score, best_score, global_step) store_dict = { "model_state_dict": model.state_dict(), "best_step": best_step, "best_score": best_score, "save_on_score": save_on_score, } path = self.train_config.save_on.best_path.format( self.log_label) self.save(store_dict, path, save_flag) if save_flag and train_log_values[ "hparams"] is not None: ( best_hparam_list, best_hparam_name_list, best_metrics_list, best_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="best_val") # pbar.close() if (global_step - 1) % log_interval == 0: # print("\nLogging\n") train_loss_name = self.train_config.criterion.type metric_list = [ metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric]) for metric in self.metrics ] metric_name_list = [ metric['type'] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / (step + 1), train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, global_step, append_text=self.train_config.append_text, ) pbar.close() if not os.path.exists(self.train_config.checkpoint.checkpoint_dir): os.makedirs(self.train_config.checkpoint.checkpoint_dir) if self.train_config.save_after_epoch: store_dict = { "model_state_dict": model.state_dict(), } path = f"{self.train_config.checkpoint.checkpoint_dir}_{str(self.train_config.log.log_label)}_{str(epoch)}.pth" self.save(store_dict, path, save_flag=1) if epoch == max_epochs: # print("\nEvaluating\n") val_scores = self.val( model, val_dataset, criterion, device, global_step, train_logger, train_log_values, ) # print("\nLogging\n") train_loss_name = self.train_config.criterion.type metric_list = [ metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric]) for metric in self.metrics ] metric_name_list = [ metric['type'] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / len(train_loader), train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, global_step, append_text=self.train_config.append_text, ) if self.train_config.save_on is not None: ## BEST SCORES UPDATING train_scores = self.get_scores( train_loss, len(train_loader), self.train_config.criterion.type, all_outputs, all_labels, ) best_score, best_step, save_flag = self.check_best( val_scores, save_on_score, best_score, global_step) store_dict = { "model_state_dict": model.state_dict(), "best_step": best_step, "best_score": best_score, "save_on_score": save_on_score, } path = self.train_config.save_on.best_path.format( self.log_label) self.save(store_dict, path, save_flag) if save_flag and train_log_values["hparams"] is not None: ( best_hparam_list, best_hparam_name_list, best_metrics_list, best_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="best_val") ## FINAL SCORES UPDATING + STORING train_scores = self.get_scores( train_loss, len(train_loader), self.train_config.criterion.type, all_outputs, all_labels, ) store_dict = { "model_state_dict": model.state_dict(), "final_step": global_step, "final_score": train_scores[save_on_score], "save_on_score": save_on_score, } path = self.train_config.save_on.final_path.format( self.log_label) self.save(store_dict, path, save_flag=1) if train_log_values["hparams"] is not None: ( final_hparam_list, final_hparam_name_list, final_metrics_list, final_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="final") train_logger.save_hyperparams( best_hparam_list, best_hparam_name_list, [ int(self.log_label), ] + best_metrics_list + final_metrics_list, [ "hparams/log_label", ] + best_metrics_name_list + final_metrics_name_list, )
action="store", help="The configuration for data", ) args = parser.parse_args() ig_config = Config(path=args.config) model_config = Config(path=args.model) data_config = Config(path=args.data) # verbose = args.verbose # Preprocessor, Dataset, Model preprocessor = configmapper.get_object( "preprocessors", data_config.main.preprocessor.name )(data_config) model, train_data, val_data = preprocessor.preprocess(model_config, data_config) tokenizer = AutoTokenizer.from_pretrained( model_config.params.pretrained_model_name_or_path ) # model = configmapper.get_object("models", model_config.name).from_pretrained( # 'bert-large-uncased' # ) model.load_state_dict(torch.load(ig_config.checkpoint_path)) # Initialize BertIntegratedGradients big = MyIntegratedGradients(ig_config, model, val_data, tokenizer)
def train(self, model, train_dataset, val_dataset=None, logger=None): model.to(self.device) optim_params = self.train_config.optimizer.params if optim_params: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters(), **optim_params.as_dict()) else: optimizer = configmapper.get_object( "optimizers", self.train_config.optimizer.type)(model.parameters()) if self.train_config.scheduler is not None: scheduler_params = self.train_config.scheduler.params if scheduler_params: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)( optimizer, **scheduler_params.as_dict()) else: scheduler = configmapper.get_object( "schedulers", self.train_config.scheduler.type)(optimizer) criterion_params = self.train_config.criterion.params if criterion_params: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)(**criterion_params.as_dict(), device=self.device) else: criterion = configmapper.get_object( "losses", self.train_config.criterion.type)() if self._config.dataloader_type == "geometric": train_loader = GeometricDataLoader( train_dataset, **self.train_config.loader_params.as_dict()) else: train_loader = DataLoader( dataset=train_dataset, **self.train_config.loader_params.as_dict()) max_epochs = self.train_config.max_epochs batch_size = self.train_config.loader_params.batch_size interval_type = self.train_config.interval_type val_interval = self.train_config.val_interval log_interval = self.train_config.log.log_interval if logger is None: train_logger = Logger( **self.train_config.log.logger_params.as_dict()) else: train_logger = logger train_log_values = self.train_config.log.vals.as_dict() best_score = (-math.inf if self.train_config.save_on.desired == "max" else math.inf) save_on_score = self.train_config.save_on.score best_step = -1 best_hparam_list = None best_hparam_name_list = None best_metrics_list = None best_metrics_name_list = None # print("\nTraining\n") # print(max_steps) global_step = 0 for epoch in range(1, max_epochs + 1): print("Epoch: {}/{}, Global Step: {}".format( epoch, max_epochs, global_step)) train_loss = 0 if self.train_config.label_type == "float": all_labels = torch.FloatTensor().to(self.device) else: all_labels = torch.LongTensor().to(self.device) all_outputs = torch.Tensor().to(self.device) train_scores = None val_scores = None pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size)) pbar.set_description("Epoch " + str(epoch)) for step, batch in enumerate(train_loader): model.train() optimizer.zero_grad() for key in batch: batch[key] = batch[key].to(self.device) inputs = {} for key in self._config.input_key: inputs[key] = batch[key] labels = batch["label"] # NOW THIS MUST BE HANDLED IN THE DATASET CLASS # if self.train_config.label_type == "float": # # Specific to Float Type # labels = labels.float() outputs = model(**inputs) # Can remove this at a later stage? # I think `losses.backward()` should work. loss = criterion(outputs, labels) loss.backward() all_labels = torch.cat((all_labels, labels), 0) outputs = outputs[0] * self.alpha + outputs[1] * (1 - self.alpha) if self.train_config.label_type == "float": all_outputs = torch.cat((all_outputs, outputs), 0) else: all_outputs = torch.cat( (all_outputs, torch.argmax(outputs, axis=1)), 0) train_loss += loss.item() optimizer.step() if self.train_config.scheduler is not None: if isinstance(scheduler, ReduceLROnPlateau): scheduler.step(train_loss / (step + 1)) else: scheduler.step() # print(train_loss) # print(step+1) pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}") pbar.update(1) global_step += 1 # Need to check if we want global_step or local_step if interval_type == "step": if (val_dataset is not None and (global_step - 1) % val_interval == 0): # print("\nEvaluating\n") val_scores = self.val( model, val_dataset, global_step, train_logger, train_log_values, ) # save_flag = 0 if self.train_config.save_on is not None: # BEST SCORES UPDATING train_scores = self.get_scores( train_loss, global_step, self.train_config.criterion.type, all_outputs, all_labels, ) best_score, best_step, save_flag = self.check_best( val_scores, save_on_score, best_score, global_step) store_dict = { "model_state_dict": model.state_dict(), "best_step": best_step, "best_score": best_score, "save_on_score": save_on_score, } path = os.path.join( train_logger.log_path, self.train_config.save_on.best_path.format( self.log_label), ) self.save(store_dict, path, save_flag) if save_flag and train_log_values[ "hparams"] is not None: ( best_hparam_list, best_hparam_name_list, best_metrics_list, best_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="best_val") # pbar.close() if (global_step - 1) % log_interval == 0: # print("\nLogging\n") train_loss_name = self.train_config.criterion.type metric_list = [ metric( all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric], ) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / (step + 1), train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, global_step, append_text=self.train_config.append_text, ) pbar.close() if not os.path.exists( os.path.join(train_logger.log_path, self.train_config.checkpoint.checkpoint_dir)): os.makedirs( os.path.join( train_logger.log_path, self.train_config.checkpoint.checkpoint_dir, )) if self.train_config.save_after_epoch: store_dict = { "model_state_dict": model.state_dict(), } path = f"{os.path.join(train_logger.log_path, self.train_config.checkpoint.checkpoint_dir)}epoch_{str(self.train_config.log.log_label)}_{str(epoch)}.pth" self.save(store_dict, path, save_flag=1) if interval_type == "epoch": if val_dataset is not None and (epoch) % val_interval == 0: # print("\nEvaluating\n") val_scores = self.val( model, val_dataset, epoch, train_logger, train_log_values, ) # save_flag = 0 if self.train_config.save_on is not None: # BEST SCORES UPDATING train_scores = self.get_scores( train_loss, epoch, self.train_config.criterion.type, all_outputs, all_labels, ) best_score, best_epoch, save_flag = self.check_best( val_scores, save_on_score, best_score, epoch) store_dict = { "model_state_dict": model.state_dict(), "best_epoch": best_epoch, "best_score": best_score, "save_on_score": save_on_score, } path = os.path.join( train_logger.log_path, self.train_config.save_on.best_path.format( self.log_label), ) self.save(store_dict, path, save_flag) if save_flag and train_log_values[ "hparams"] is not None: ( best_hparam_list, best_hparam_name_list, best_metrics_list, best_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="best_val") # pbar.close() if (epoch) % log_interval == 0: # print("\nLogging\n") train_loss_name = self.train_config.criterion.type metric_list = [ metric( all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric], ) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / len(train_loader), train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, epoch, append_text=self.train_config.append_text, ) if epoch == max_epochs: # print("\nEvaluating\n") if interval_type == "step": val_scores = self.val( model, val_dataset, global_step, train_logger, train_log_values, ) # print("\nLogging\n") train_loss_name = self.train_config.criterion.type metric_list = [ metric( all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric], ) for metric in self.metrics ] metric_name_list = [ metric["type"] for metric in self._config.main_config.metrics ] train_scores = self.log( train_loss / len(train_loader), train_loss_name, metric_list, metric_name_list, train_logger, train_log_values, global_step, append_text=self.train_config.append_text, ) if self.train_config.save_on is not None: # BEST SCORES UPDATING train_scores = self.get_scores( train_loss, len(train_loader), self.train_config.criterion.type, all_outputs, all_labels, ) best_score, best_step, save_flag = self.check_best( val_scores, save_on_score, best_score, global_step) store_dict = { "model_state_dict": model.state_dict(), "best_step": best_step, "best_score": best_score, "save_on_score": save_on_score, } path = os.path.join( train_logger.log_path, self.train_config.save_on.best_path.format( self.log_label), ) self.save(store_dict, path, save_flag) if save_flag and train_log_values["hparams"] is not None: ( best_hparam_list, best_hparam_name_list, best_metrics_list, best_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="best_val") # FINAL SCORES UPDATING + STORING train_scores = self.get_scores( train_loss, len(train_loader), self.train_config.criterion.type, all_outputs, all_labels, ) store_dict = { "model_state_dict": model.state_dict(), "final_step": global_step, "final_score": train_scores[save_on_score], "save_on_score": save_on_score, } path = os.path.join( train_logger.log_path, self.train_config.save_on.final_path.format( self.log_label), ) self.save(store_dict, path, save_flag=1) if train_log_values["hparams"] is not None: ( final_hparam_list, final_hparam_name_list, final_metrics_list, final_metrics_name_list, ) = self.update_hparams(train_scores, val_scores, desc="final") train_logger.save_hyperparams( best_hparam_list, best_hparam_name_list, [ int(self.log_label), ] + best_metrics_list + final_metrics_list, [ "hparams/log_label", ] + best_metrics_name_list + final_metrics_name_list, )
# help="The configuration for data", # ) args = parser.parse_args() model_config = OmegaConf.load(os.path.join(args.config_dir, "model.yaml")) train_config = OmegaConf.load(os.path.join(args.config_dir, "train.yaml")) data_config = OmegaConf.load(os.path.join(args.config_dir, "dataset.yaml")) # Seed seed(train_config.args.seed) # just in case # Data if "main" in dict(data_config).keys(): # Regular Data train_data_config = data_config.train val_data_config = data_config.val train_data = configmapper.get_object( "datasets", train_data_config.name)(train_data_config) val_data = configmapper.get_object("datasets", val_data_config.name)(val_data_config) else: # HF Type Data dataset = configmapper.get_object("datasets", data_config.name)(data_config) train_data = dataset.train_dataset["train"] val_data = dataset.train_dataset["test"] # Model model = configmapper.get_object("models", model_config.name)(model_config) args = TrainingArguments( **OmegaConf.to_container(train_config.args, resolve=True)) # Checking for Checkpoints