def _infer_prep(self, Xs, max_length=None): max_length = max_length or self.config.max_length arr_encoded = self._text_to_ids(Xs, max_length=max_length) n_batch_train = self.config.batch_size * max(len(self.config.visible_gpus), 1) self._build_model(n_updates_total=0, target_dim=self.target_dim, train=False) yield from iter_data(arr_encoded.token_ids, arr_encoded.mask, n_batch=n_batch_train, verbose=self.config.verbose)
def _infer_prep(self, *Xs, max_length=None): max_length = max_length or self.config.max_length arr_encoded = self._text_to_ids(*Xs, max_length=max_length) n_batch_train = self.config.batch_size * max(len(self.config.visible_gpus), 1) self._build_model(n_updates_total=0, target_dim=self.target_dim, train=False) yield from iter_data(arr_encoded.token_ids, arr_encoded.mask, n_batch=n_batch_train, verbose=self.config.verbose)
def _infer_prep(self, *X, max_length=None): max_length = max_length or self.config.max_length infer_x, infer_mask = self._text_to_ids(*X, max_length=max_length) n_batch_train = self.config.batch_size * max( len(get_available_gpus(self.config)), 1) self._build_model(n_updates_total=0, target_dim=self.target_dim, train=False) yield from iter_data(infer_x, infer_mask, n_batch=n_batch_train, verbose=self.config.verbose)
def _training_loop(self, arr_encoded, Y, batch_size=None): batch_size = batch_size or self.config.batch_size self.label_encoder = self._get_target_encoder() n_batch_train = batch_size * max(len(get_available_gpus(self.config)), 1) train_x, train_mask = arr_encoded.token_ids, arr_encoded.mask n_examples = train_x.shape[0] n_updates_total = (n_examples // n_batch_train) * self.config.n_epochs if Y is not None: Y = self.label_encoder.fit_transform(Y) target_dim = len(self.label_encoder.target_dim) else: # only language model will be trained, mock fake target Y = [[None]] * n_examples target_dim = None self._build_model(n_updates_total=n_updates_total, target_dim=target_dim) dataset = (train_x, train_mask, Y) x_tr, x_va, m_tr, m_va, y_tr, y_va = train_test_split(*dataset, test_size=self.config.val_size, random_state=self.config.seed) dataset = (x_tr, m_tr, y_tr) val_dataset = (x_va, m_va, y_va) self.is_trained = True avg_train_loss = 0 avg_val_loss = 0 global_step = 0 best_val_loss = float("inf") val_window = [float("inf")] * self.config.val_window_size for i in range(self.config.n_epochs): for xmb, mmb, ymb in iter_data(*dataset, n_batch=n_batch_train, verbose=self.config.verbose): global_step += 1 if global_step % self.config.val_interval == 0: tqdm.tqdm.write("Train loss is :{}, Val loss is :{}".format(avg_train_loss, avg_val_loss)) outputs = self._eval( self.summaries, feed_dict={ self.X: xmb, self.M: mmb, self.Y: ymb, self.do_dropout: DROPOUT_OFF } ) if self.train_writer is not None: self.train_writer.add_summary(outputs.get(self.summaries), global_step) sum_val_loss = 0 for xval, mval, yval in iter_data(*val_dataset, n_batch=n_batch_train, verbose=self.config.verbose, tqdm_desc="Validation"): outputs = self._eval( self.clf_loss, self.summaries, feed_dict={ self.X: xval, self.M: mval, self.Y: yval, self.do_dropout: DROPOUT_OFF } ) if self.valid_writer is not None: self.valid_writer.add_summary(outputs.get(self.summaries), global_step) val_cost = outputs.get(self.clf_loss, 0) sum_val_loss += val_cost avg_val_loss = ( avg_val_loss * self.config.rolling_avg_decay + val_cost * (1 - self.config.rolling_avg_decay) ) val_window.append(sum_val_loss) val_window.pop(0) if np.mean(val_window) <= best_val_loss: best_val_loss = np.mean(val_window) if self.config.save_best_model: self.save(self.config.autosave_path) outputs = self._eval( self.clf_loss, self.train_op, feed_dict={ self.X: xmb, self.M: mmb, self.Y: ymb, self.do_dropout: DROPOUT_ON } ) cost = outputs.get(self.clf_loss, 0) avg_train_loss = avg_train_loss * self.config.rolling_avg_decay + cost * ( 1 - self.config.rolling_avg_decay) return self
def _training_loop(self, arr_encoded, Y=None, batch_size=None): self.label_encoder = self._target_encoder() idxs = list(range(len(arr_encoded.token_ids))) train_idxs, val_idxs = train_test_split(idxs, test_size=self.config.val_size) if Y is None: # only language model will be trained, mock fake target of right length train_Y = np.asarray([[]] * len(train_idxs)) val_Y = np.asarray([[]] * len(val_idxs)) target_dim = None else: Y = np.asarray(Y) train_Y = self.label_encoder.fit_transform(Y[train_idxs]) val_Y = self.label_encoder.transform(Y[val_idxs]) target_dim = self.label_encoder.target_dim batch_size = batch_size or self.config.batch_size n_batch_train = batch_size * max(len(self.config.visible_gpus), 1) n_examples = len(train_idxs) n_updates_total = (n_examples // n_batch_train) * self.config.n_epochs train_dataset = (arr_encoded.token_ids[train_idxs], arr_encoded.mask[train_idxs], train_Y) val_dataset = (arr_encoded.token_ids[val_idxs], arr_encoded.mask[val_idxs], val_Y) self._build_model(n_updates_total=n_updates_total, target_dim=target_dim) self.is_trained = True avg_train_loss = None avg_val_loss = None global_step = 0 best_val_loss = float("inf") val_window = [float("inf")] * self.config.val_window_size for i in range(self.config.n_epochs): iterator = iter_data( *train_dataset, n_batch=n_batch_train, tqdm_desc="Epoch {}".format(i), verbose=self.config.verbose ) for (xmb, mmb, ymb) in iterator: feed_dict = { self.X: xmb, self.M: mmb, } if target_dim: feed_dict[self.Y] = ymb global_step += 1 if global_step % self.config.val_interval == 0: feed_dict[self.do_dropout] = DROPOUT_OFF outputs = self._eval(self.summaries, feed_dict=feed_dict) if self.train_writer is not None: self.train_writer.add_summary(outputs.get(self.summaries), global_step) sum_val_loss = 0 for xval, mval, yval in iter_data(*val_dataset, n_batch=n_batch_train, verbose=self.config.verbose, tqdm_desc="Validation"): feed_dict = { self.X: xval, self.M: mval, self.do_dropout: DROPOUT_OFF } if target_dim: feed_dict[self.Y] = yval outputs = self._eval(self.target_loss, self.summaries, feed_dict=feed_dict) if self.valid_writer is not None: self.valid_writer.add_summary(outputs.get(self.summaries), global_step) val_cost = outputs.get(self.target_loss, 0) sum_val_loss += val_cost if avg_val_loss is None: avg_val_loss = val_cost else: avg_val_loss = ( avg_val_loss * self.config.rolling_avg_decay + val_cost * (1 - self.config.rolling_avg_decay) ) val_window.append(sum_val_loss) val_window.pop(0) if np.mean(val_window) <= best_val_loss: best_val_loss = np.mean(val_window) if self.config.autosave_path is not None: self.save(self.config.autosave_path) tqdm.tqdm.write("Train loss: {}\t Validation loss: {}".format(avg_train_loss, avg_val_loss)) feed_dict[self.do_dropout] = DROPOUT_ON outputs = self._eval(self.target_loss, self.train_op, feed_dict=feed_dict) cost = outputs.get(self.target_loss, 0) if avg_train_loss is None: avg_train_loss = cost else: avg_train_loss = avg_train_loss * self.config.rolling_avg_decay + cost * ( 1 - self.config.rolling_avg_decay) return self
def _training_loop(self, arr_encoded, Y=None, batch_size=None): self.label_encoder = self._target_encoder() idxs = list(range(len(arr_encoded.token_ids))) train_idxs, val_idxs = train_test_split(idxs, test_size=self.config.val_size) if Y is None: # only language model will be trained, mock fake target of right length train_Y = np.asarray([[]] * len(train_idxs)) val_Y = np.asarray([[]] * len(val_idxs)) target_dim = None else: Y = np.asarray(Y) train_Y = self.label_encoder.fit_transform(Y[train_idxs]) val_Y = self.label_encoder.transform(Y[val_idxs]) target_dim = self.label_encoder.target_dim batch_size = batch_size or self.config.batch_size n_batch_train = batch_size * max(len(self.config.visible_gpus), 1) n_examples = len(train_idxs) n_updates_total = (n_examples // n_batch_train) * self.config.n_epochs train_dataset = (arr_encoded.token_ids[train_idxs], arr_encoded.mask[train_idxs], train_Y) val_dataset = (arr_encoded.token_ids[val_idxs], arr_encoded.mask[val_idxs], val_Y) self._build_model(n_updates_total=n_updates_total, target_dim=target_dim) self.is_trained = True avg_train_loss = None avg_val_loss = None global_step = 0 best_val_loss = float("inf") val_window = [float("inf")] * self.config.val_window_size for i in range(self.config.n_epochs): for (xmb, mmb, ymb) in iter_data(*train_dataset, n_batch=n_batch_train, verbose=self.config.verbose): feed_dict = { self.X: xmb, self.M: mmb, } if target_dim: feed_dict[self.Y] = ymb global_step += 1 if global_step % self.config.val_interval == 0: feed_dict[self.do_dropout] = DROPOUT_OFF outputs = self._eval(self.summaries, feed_dict=feed_dict) if self.train_writer is not None: self.train_writer.add_summary(outputs.get(self.summaries), global_step) sum_val_loss = 0 for xval, mval, yval in iter_data(*val_dataset, n_batch=n_batch_train, verbose=self.config.verbose, tqdm_desc="Validation"): feed_dict = { self.X: xval, self.M: mval, self.do_dropout: DROPOUT_OFF } if target_dim: feed_dict[self.Y] = yval outputs = self._eval(self.target_loss, self.summaries, feed_dict=feed_dict) if self.valid_writer is not None: self.valid_writer.add_summary(outputs.get(self.summaries), global_step) val_cost = outputs.get(self.target_loss, 0) sum_val_loss += val_cost if avg_val_loss is None: avg_val_loss = val_cost else: avg_val_loss = ( avg_val_loss * self.config.rolling_avg_decay + val_cost * (1 - self.config.rolling_avg_decay) ) val_window.append(sum_val_loss) val_window.pop(0) if np.mean(val_window) <= best_val_loss: best_val_loss = np.mean(val_window) if self.config.save_best_model: self.save(self.config.autosave_path) tqdm.tqdm.write("Train loss: {}\t Validation loss: {}".format(avg_train_loss, avg_val_loss)) feed_dict[self.do_dropout] = DROPOUT_ON outputs = self._eval(self.target_loss, self.train_op, feed_dict=feed_dict) cost = outputs.get(self.target_loss, 0) if avg_train_loss is None: avg_train_loss = cost else: avg_train_loss = avg_train_loss * self.config.rolling_avg_decay + cost * ( 1 - self.config.rolling_avg_decay) return self