def transform(self, X, y=None): logging.info(" DataVectorizer.transform running...") start_time = time.time() # Apply pre-processing function if self.preprocess_func: _X = parallelApply(X, self.preprocess_func, self.preprocess_ncore) else: _X = X if y is not None and self.encode_label: _y = self.label_encoder.transform(y) else: _y = np.asarray(y) # transform sentences into list of word indexes self.vectorizer.transform(X) _X = self.texts_to_sequences(_X) logging.info(" DataVectorizer.transform completed - Time elapsed: " + get_elapsed_time(start_time)) if y is not None: return _X, _y else: return _X
def to_numpy(self, dest_folder: str): logging.info("Starting Data Export...") start_time = time.time() if self.tokenizer is not None: self.tokenizer.to_json(os.path.join(dest_folder, "tokenizer.json")) if self.X_train is not None: np.savez_compressed( os.path.join(dest_folder, "train_data_nn.npz"), X=self.X_train, y=self.y_train, ) if self.X_test is not None: np.savez_compressed( os.path.join(dest_folder, "test_data_nn.npz"), X=self.X_test, y=self.y_test, ) if self.X_val is not None: np.savez_compressed(os.path.join(dest_folder, "val_data_nn.npz"), X=self.X_val, y=self.y_val) logging.info("Data Export Completed - Time elapsed: " + get_elapsed_time(start_time))
def fit_transform(self, X, y=None): logging.info("DataVectorizer.fit_transform running...") start_time = time.time() # Apply pre-processing function if self.preprocess_func: _X = parallelApply(X, self.preprocess_func, self.preprocess_ncore) else: _X = X if y is not None: if self.encode_label: self.label_encoder = LabelEncoder() _y = self.label_encoder.fit_transform(y) self.labels = self.label_encoder.classes_ else: _y = np.asarray(y) if self.preprocess_func is None: re_tok = re.compile('([%s“”¨«»®´·º½¾¿¡§£₤‘’])' % string.punctuation) tokenizer = lambda x: re_tok.sub(r' \1 ', x).split() self.vectorizer = CountVectorizer(tokenizer=tokenizer, ngram_range=self.ngram_range, min_df=self.min_df, max_df=self.max_df, max_features=self.max_features, stop_words=self.stop_words, lowercase=True) else: self.vectorizer = CountVectorizer(tokenizer=None, ngram_range=self.ngram_range, min_df=self.min_df, max_df=self.max_df, max_features=self.max_features, stop_words=self.stop_words) self.vectorizer.fit_transform(_X) kept_tokens = set(self.vectorizer.vocabulary_.keys()) for token in kept_tokens: self.index2w.append(token) self.w2index[token] = len(self.index2w) - 1 self.vocab_size = len(self.index2w) del kept_tokens # transform sentences into list of word indexes _X = self.texts_to_sequences(_X) logging.info( " DataVectorizer.fit_transform completed - Time elapsed: " + get_elapsed_time(start_time)) if y is not None: return _X, _y else: return _X
def get_pretrained_vecs( input_vec_file: str, target_vocab: dict, dim: int = 300, output_file=None ): logging.getLogger(__name__) start_time = time.time() found_words = 0 missing_words = 0 # import word vector text file into a pandas dataframe (quicker) df_wvecs = pd.read_csv(input_vec_file, sep=" ", quoting=3, header=None, index_col=0) # create word index dict from pretrained vectors w_index = df_wvecs.index.tolist() w_index = dict(zip(w_index, range(len(w_index)))) np_w_vecs = df_wvecs.to_numpy() del df_wvecs # initialize embedding matrix weights emb_mean, emb_std = np_w_vecs.mean(), np_w_vecs.std() embedding_matrix = np.random.normal(emb_mean, emb_std, (len(target_vocab), dim)) embedding_matrix[0] = np.zeros(dim) # recopy pretrained vect weights into embedding_matrix # TODO: vectorize the following code for k, v in tqdm( target_vocab.items(), desc="{} Processing pretrained vectors...".format( datetime.today().strftime("%Y-%m-%d %H:%M:%S") ), total=len(target_vocab), ): if k in w_index.keys(): found_words += 1 embedding_matrix[v] = np_w_vecs[w_index[k]] else: missing_words += 1 if output_file is not None: np.save(output_file, embedding_matrix) logging.info( "Matching words: {} - input vocab: {} - coverage: {}".format( found_words, len(target_vocab), found_words / len(target_vocab) ) ) logging.info( "Pretrained Vectors Preparation - Completed - Time elapsed: " + get_elapsed_time(start_time) ) return embedding_matrix
def from_numpy( self, train_data_file: str, test_data_file: str = None, val_data_file: str = None, ds_type="TensorDataset", ): logging.info("Starting Data Preparation...") start_time = time.time() self.tokenizer = text.tokenizer_from_json() train_npz = np.load(train_data_file, allow_pickle=True) self.X_train = train_npz["X"].item() self.y_train = train_npz["y"] self.num_classes = len(np.unique(self.y_train)) self.vocab_size = np.shape(self.X_train)[1] train_ds = CSRDataset(self.X_train, self.y_train) if test_data_file is not None: test_npz = np.load(test_data_file, allow_pickle=True) self.X_test = test_npz["X"].item() self.y_test = test_npz["y"] test_ds = CSRDataset(self.X_test, self.y_test) if val_data_file is not None: val_npz = np.load(val_data_file, allow_pickle=True) self.X_val = val_npz["X"].item() self.y_val = val_npz["y"] val_ds = CSRDataset(self.X_val, self.y_val) logging.info("Data Import Completed - Time elapsed: " + get_elapsed_time(start_time)) if val_data_file is not None: if test_data_file is not None: return train_ds, val_ds, test_ds else: return train_ds, val_ds else: return train_ds
def from_csv( self, train_file: str, test_file: str = None, val_file: str = None, val_size: float = 0.0, text_col_idx=0, label_col_idx=1, sep: str = ",", header=0, encoding: str = "utf8", preprocess_func=None, preprocess_ncore=2, ngram_range=(1, 3), min_df=1, max_df=1.0, stop_words="english", max_features=20000, ds_max_seq=1000, ds_type="TensorDataset", ): logging.info("Starting Data Preparation ...") logging.info(" Training Data ...") start_time = time.time() np.random.seed(self.seed) self.vectorizer = DataVectorizer( preprocess_func, preprocess_ncore, ngram_range=ngram_range, min_df=min_df, max_df=max_df, max_features=max_features, stop_words=stop_words, ) self.train_file = train_file self.test_file = test_file self.val_file = val_file self.text_col_idx = text_col_idx self.label_col_idx = label_col_idx df = pd.read_csv(self.train_file, sep=sep, encoding=encoding, header=header) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df if val_size > 0.0 and self.val_file is None: # create valid partition from train partition, keeping class distribution X_train, X_val, y_train, y_val = train_test_split( X, y, stratify=y, test_size=val_size, random_state=self.seed) X = X_train y = y_train del X_train, y_train gc.collect() # Input features features X = self.vectorizer.fit_transform(X) self.X_train = sequence.pad_sequences(X, maxlen=ds_max_seq, padding="post") self.y_train = y del X, y gc.collect() self.vocab_size = self.vectorizer.vocab_size self.vocab = self.vectorizer.w2index self.num_classes = len(list(set(self.y_train))) if ds_type == "TensorDataset": train_ds = TensorDataset( torch.from_numpy(self.X_train).long(), torch.from_numpy(self.y_train).long(), ) else: train_ds = NNDataset(self.X_train, self.y_train, max_seq=ds_max_seq) if self.test_file is not None: logging.info(" Test Data ...") df = pd.read_csv(self.test_file, sep=sep, encoding=encoding, header=header) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df X = self.vectorizer.transform(X) self.X_test = sequence.pad_sequences(X, maxlen=ds_max_seq) self.y_test = y del X, y gc.collect() if ds_type == "TensorDataset": test_ds = TensorDataset( torch.from_numpy(self.X_test).long(), torch.from_numpy(self.y_test).long(), ) else: test_ds = NNDataset(self.X_test, self.y_test, max_seq=ds_max_seq) if (val_size > 0.0 and self.val_file is None) or self.val_file is not None: logging.info(" Validation Data ...") if self.val_file is not None: df = pd.read_csv(self.val_file, sep=sep, encoding=encoding) X_val = df[df.columns[self.text_col_idx]].tolist() y_val = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df X_val = self.vectorizer.transform(X_val) self.X_val = sequence.pad_sequences(X_val, maxlen=ds_max_seq) self.y_val = y_val del X_val, y_val gc.collect() if ds_type == "TensorDataset": val_ds = TensorDataset( torch.from_numpy(self.X_val).long(), torch.from_numpy(self.y_val).long(), ) else: val_ds = NNDataset(self.X_val, self.y_val, max_seq=ds_max_seq) logging.info("Data Preparation Completed - Time elapsed: " + get_elapsed_time(start_time)) self.params = { "seed": self.seed, "train_file": self.train_file, "test_file": self.test_file, "val_file": self.val_file, "vocabulary_size": self.vocab_size, "preprocess_ncore": preprocess_ncore, "stop_words": stop_words, "max_features": max_features, "ngram_range": ngram_range, "min_df": min_df, "max_df": max_df, "ds_max_seq": ds_max_seq, "num_classes": self.num_classes } if preprocess_func is not None: self.params.update({"preprocess_func": preprocess_func.__name__}) else: self.params.update({"preprocess_func": None}) if self.val_file is not None: if self.test_file is not None: return train_ds, test_ds, val_ds else: return train_ds, val_ds else: return train_ds
def from_csv( self, train_file: str, test_file: str = None, val_file: str = None, val_size: float = 0.0, text_col_idx=0, label_col_idx=1, sep: str = ",", header=0, encoding: str = "utf8", ngram_range=(1, 2), min_df=5, max_df=0.9, use_idf: bool = True, sublinear_tf: bool = False, norm="l2", binary=False, max_features=None, stop_words=None, preprocess_func=None, preprocess_ncore=2, ): logging.info("Starting Data Preparation...") start_time = time.time() self.train_file = train_file self.test_file = test_file self.val_file = val_file self.text_col_idx = text_col_idx self.label_col_idx = label_col_idx re_tok = re.compile("([%s“”¨«»®´·º½¾¿¡§£₤‘’])" % string.punctuation) tokenizer = lambda x: re_tok.sub(r" \1 ", x).split() self.vectorizer = TfidfVectorizer( use_idf=use_idf, tokenizer=tokenizer, ngram_range=ngram_range, min_df=min_df, max_df=max_df, sublinear_tf=sublinear_tf, norm=norm, binary=binary, max_features=max_features, stop_words=stop_words, ) df = pd.read_csv(self.train_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(float) del df self.X_train = self.vectorizer.fit_transform(X) self.y_train = y self.vocab_size = len( [v for k, v in self.vectorizer.vocabulary_.items()]) self.num_classes = len(np.unique(self.y_train)) del X, y train_ds = CSRDataset(self.X_train, self.y_train) gc.collect() if self.test_file is not None: df = pd.read_csv(self.test_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(float) del df self.X_test = self.vectorizer.transform(X) self.y_test = y del X, y test_ds = CSRDataset(self.X_test, self.y_test) gc.collect() if self.val_file is not None: # or val_size > 0.0: df = pd.read_csv(self.val_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(float) del df self.X_val = self.vectorizer.transform(X) self.y_val = y del X, y val_ds = CSRDataset(self.X_val, self.y_val) gc.collect() logging.info("Data Preparation Completed - Time elapsed: " + get_elapsed_time(start_time)) if self.val_file is not None: if self.test_file is not None: return train_ds, test_ds, val_ds else: return train_ds, val_ds else: return train_ds
def from_csv( self, train_file: str, test_file: str = None, val_file: str = None, val_size: float = 0.1, text_col_idx=0, label_col_idx=1, sep: str = ",", header=0, encoding: str = "utf8", preprocess_func=None, preprocess_ncore=2, ngram_range=(1, 3), max_features=20000, ds_max_seq=1000, ds_type="TensorDataset", ): logging.info("Starting Data Preparation...") start_time = time.time() self.train_file = train_file self.test_file = test_file self.val_file = val_file self.text_col_idx = text_col_idx self.label_col_idx = label_col_idx df = pd.read_csv(self.train_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df logging.info("Adding 1-gram features".format(ngram_range[1])) self.tokenizer = Tokenizer(num_words=max_features, lower=False, filters="") self.tokenizer.fit_on_texts(X) self.X_train = self.tokenizer.texts_to_sequences(X) if ngram_range[1] > 1: logging.info("Adding N-gram features".format(ngram_range[1])) # Create set of unique n-gram from the training set. ngram_set = set() for input_list in self.X_train: for i in range(2, ngram_range[1] + 1): set_of_ngram = self.create_ngram_set(input_list, ngram_value=i) ngram_set.update(set_of_ngram) # Dictionary mapping n-gram token to a unique integer. # Integer values are greater than max_features in order # to avoid collision with existing features. start_index = max_features + 1 token_indice = { v: k + start_index for k, v in enumerate(ngram_set) } indice_token = {token_indice[k]: k for k in token_indice} # max_features is the highest integer that could be found in the dataset. max_features = np.max(list(indice_token.keys())) + 1 # Augmenting input tokens with n-grams features self.X_train = self.add_ngram(self.X_train, token_indice, ngram_range[1]) self.X_train = sequence.pad_sequences(self.X_train, maxlen=ds_max_seq) self.y_train = y self.vocab_size = max_features logging.info("Building final vocab...") vocab_wrd_idx = set() _ = [vocab_wrd_idx.add(idx) for sent in self.X_train for idx in sent] del _ self.vocab = { self.tokenizer.index_word[i]: i for i in vocab_wrd_idx if i in self.tokenizer.index_word } # self.strt = start_index # if ngram_range[1] > 1: # self._start = start_index # a = [str(indice_token[i]) for i in range(start_index, len(vocab_wrd_idx)) if i in indice_token[i]] self.num_classes = len(np.unique(self.y_train)) del X, y gc.collect() if ds_type == "TensorDataset": train_ds = TensorDataset( torch.from_numpy(self.X_train).long(), torch.from_numpy(self.y_train).long(), ) else: train_ds = NNDataset(self.X_train, self.y_train, max_seq=ds_max_seq) if self.test_file is not None: df = pd.read_csv(self.test_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df self.X_test = self.tokenizer.texts_to_sequences(X) if ngram_range[1] > 1: self.X_test = self.add_ngram(self.X_test, token_indice, ngram_range[1]) self.X_test = sequence.pad_sequences(self.X_train, maxlen=ds_max_seq) self.y_test = y del X, y gc.collect() if ds_type == "TensorDataset": test_ds = TensorDataset( torch.from_numpy(self.X_test).long(), torch.from_numpy(self.y_test).long(), ) else: test_ds = NNDataset(self.X_test, self.y_test, max_seq=ds_max_seq) if self.val_file is not None: df = pd.read_csv(self.val_file, sep=sep, encoding=encoding) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(dtype=int) del df self.X_val = self.tokenizer.texts_to_sequences(X) if ngram_range[1] > 1: self.X_val = self.add_ngram(self.X_val, token_indice, ngram_range[1]) self.X_val = sequence.pad_sequences(self.X_val, maxlen=ds_max_seq) self.y_val = y del X, y gc.collect() if ds_type == "TensorDataset": val_ds = TensorDataset( torch.from_numpy(self.X_val).long(), torch.from_numpy(self.y_val).long(), ) else: val_ds = NNDataset(self.X_val, self.y_val, max_seq=ds_max_seq) logging.info("Data Preparation Completed - Time elapsed: " + get_elapsed_time(start_time)) if self.val_file is not None: if self.test_file is not None: return train_ds, test_ds, val_ds else: return train_ds, val_ds else: return train_ds
def from_csv( self, train_file: str, test_file: str = None, val_file: str = None, val_size: float = 0.0, text_col_idx=0, label_col_idx=1, sep: str = ",", header=0, encoding: str = "utf8", ngram_range=(1, 3), min_df=1, max_df=1.0, use_idf: bool = False, sublinear_tf: bool = False, norm="l2", binary=False, max_features=None, stop_words=None, preprocess_func=None, preprocess_ncore=2, ds_max_seq=1000, ds_type="Dataset", ): logging.info("Starting Data Preparation...") logging.info(" Training Data ...") start_time = time.time() self.train_file = train_file self.test_file = test_file self.val_file = val_file self.text_col_idx = text_col_idx self.label_col_idx = label_col_idx re_tok = re.compile("([%s“”¨«»®´·º½¾¿¡§£₤‘’])" % string.punctuation) tokenizer = lambda x: re_tok.sub(r" \1 ", x).split() # self.vectorizer = TfidfVectorizer( use_idf=use_idf, tokenizer=tokenizer, ngram_range=ngram_range, min_df=min_df, max_df=max_df, sublinear_tf=sublinear_tf, norm=norm, binary=binary, max_features=max_features, stop_words=stop_words, ) df = pd.read_csv(self.train_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(int) del df self.X_train = self.vectorizer.fit_transform(X) self.y_train = y self.vocab_size = len( [v for k, v in self.vectorizer.vocabulary_.items()]) self.num_classes = len(np.unique(self.y_train)) self.X_train_words_seq, _ = self._bow2adjlist(self.X_train, max_seq=ds_max_seq) self.r = np.column_stack([ self.calc_r(i, self.X_train, self.y_train) for i in range(self.num_classes) ]) del X, y train_ds = TensorDataset( torch.from_numpy(self.X_train_words_seq.toarray()).long(), torch.from_numpy(self.y_train).long(), ) gc.collect() if self.test_file is not None: logging.info(" Test Data ...") df = pd.read_csv(self.test_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(int) del df self.X_test = self.vectorizer.transform(X) self.y_test = y self.X_test_words_seq, _ = self._bow2adjlist(self.X_test, max_seq=ds_max_seq) del X, y test_ds = TensorDataset( torch.from_numpy(self.X_test_words_seq.toarray()).long(), torch.from_numpy(self.y_test).long(), ) gc.collect() if self.val_file is not None: logging.info(" Validation Data ...") df = pd.read_csv(self.val_file, sep=sep, encoding=encoding, header=header) if preprocess_func is not None: df[df.columns[self.text_col_idx]] = parallelApply( df[df.columns[self.text_col_idx]], preprocess_func, preprocess_ncore) X = df[df.columns[self.text_col_idx]].tolist() y = df[df.columns[self.label_col_idx]].to_numpy(int) del df self.X_val = self.vectorizer.transform(X) self.y_val = y self.X_val_words_seq, _ = self._bow2adjlist(self.X_val, max_seq=ds_max_seq) del X, y val_ds = TensorDataset( torch.from_numpy(self.X_val_words_seq.toarray()).long(), torch.from_numpy(self.y_val).long(), ) gc.collect() logging.info("Data Preparation Completed - Time elapsed: " + get_elapsed_time(start_time)) if self.val_file is not None: if self.test_file is not None: return self.r, train_ds, test_ds, val_ds else: return self.r, train_ds, val_ds else: return self.r, train_ds
def train_evaluate( self, seed=42, check_dl=True, run_lr_finder=False, show_lr_plot: bool = False, ): set_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.train_dl = DataLoader(self.train_ds, batch_size=self.batch_size, shuffle=True) if self.test_ds is not None: self.test_dl = DataLoader(self.test_ds, batch_size=self.batch_size, shuffle=False) if self.val_ds is not None: self.val_dl = DataLoader(self.val_ds, batch_size=self.batch_size, shuffle=False) self.model = self.model.to(device) self.criterion = self.criterion.to(device) if run_lr_finder: logging.info("LR Finder Running....") lr_finder = LRFinder(self.model, self.optimizer, criterion=self.criterion, device=device) lr_finder.range_test(self.train_dl, start_lr=10e-6, end_lr=1, num_iter=100) lr_finder.plot( show=show_lr_plot, output_path="LR_finder_{}_{}.png".format( self.model.__class__.__name__, datetime.now().strftime("%Y%m%d_%H%M%S"), ), ) logging.info("LR Finder Run Completed....") # Checking the dataloaders if check_dl: for data, labels in self.train_dl: logging.info("----------------------------------") logging.info("--- DATALOADER INFO ---") logging.info("----------------------------------") logging.info("Train DataLoader Details:") logging.info(" batch dimensions: {}".format(data.shape)) logging.info(" label dimensions: {}".format(labels.shape)) break for data, labels in self.val_dl: logging.info("Val DataLoader Details:") logging.info(" batch dimensions: {}".format(data.shape)) logging.info(" label dimensions: {}".format(labels.shape)) break logging.info("----------------------------------") logging.info("--- MODEL TRAINING ---") logging.info("----------------------------------") model_parameters_count = sum(p.numel() for p in self.model.parameters() if p.requires_grad) n_iters = round(len(self.train_ds) / self.batch_size) logging.info("Number of iterations/epoch : {}".format(n_iters)) log_interval = self.log_interval # Loop over epochs start_time = time.time() for epoch in range(self.n_epochs): train_losses = [] losses = [] epoch_start_time = time.time() self.model.train() for batch_index, (batch_train_data, batch_train_labels) in enumerate(self.train_dl): # transfer data to target device batch_train_data = batch_train_data.to(device) batch_train_labels = batch_train_labels.to(device) # zero the parameter gradients self.optimizer.zero_grad() # forward pass outputs = self.model(batch_train_data) loss = self.criterion(outputs, batch_train_labels) # Store loss values self.all_train_loss_hist.append(loss.item()) losses.append(loss.item()) # Computes gradient if self.apex: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Gradient Clipping if self.max_grad_clip_norm is not None: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_clip_norm) # Update model parameters self.optimizer.step() # Adjust learning rate / scheduler if specified if self.lr_scheduler is not None: self.lr_scheduler.step() # Report intermediate loss value after a certain amount of batches if batch_index % log_interval == 0: avg_train_loss = np.mean(losses) train_losses.append(avg_train_loss) logging.info( " Info | Epoch: %03d/%03d | Batch %04d/%04d | Average Loss: %.6f" % (epoch + 1, self.n_epochs, batch_index + 1, n_iters, avg_train_loss)) losses = [] logging.info(" Info | " + get_gpu_info(device)) # End of epoch - Evaluate the model performance self.model.eval() with torch.set_grad_enabled(False): # save memory during inference logging.info("Epoch: %03d/%03d | Train Accuracy: %.6f" % ( epoch + 1, self.n_epochs, compute_accuracy(self.model, self.train_dl, device=device), )) val_acc = compute_accuracy(self.model, self.val_dl, device=device) logging.info("Epoch: %03d/%03d | Val accuracy: %.6f" % (epoch + 1, self.n_epochs, val_acc)) logging.info("Epoch: %03d/%03d | Epoch duration: %s" % (epoch + 1, self.n_epochs, get_elapsed_time(epoch_start_time))) logging.info( "Epoch: %03d/%03d | Total time elapsed: %s" % (epoch + 1, self.n_epochs, get_elapsed_time(start_time))) # early stopping & checkpoint current_score = val_acc if self.best_score is None: self.best_score = current_score.to( torch.device("cpu")).numpy() self.best_epoch = epoch + 1 self.save_checkpoint() elif (self.apply_early_stopping and current_score < self.best_score + self.es_improvement_delta): self.es_counter += 1 logging.info( f"EarlyStopping patience counter: {self.es_counter} out of {self.es_patience}" ) if self.es_counter >= self.es_patience: self.early_stop = True logging.warning( "/!\ Early stopping model training /!\ ") break else: self.best_score = current_score self.best_epoch = epoch + 1 self.save_checkpoint() self.es_counter = 0 # Final results logging.info("------------------------------------------") logging.info("--- SUMMARY ---") logging.info("------------------------------------------") logging.info( "Number of model parameters : {}".format(model_parameters_count)) logging.info("Total Training Time: {}".format( get_elapsed_time(start_time))) logging.info("Total Time: {}".format(get_elapsed_time(start_time))) logging.info("Best Epoch: {} - Accuracy Score: {:.6f}".format( self.best_epoch, self.best_score)) logging.info("------------------------------------------")