def save(self, file_prefix="", directory=None, return_filename=False, verbose=True): """ file_prefix (str): Appended to beginning of file-name (does not affect directory in file-path). directory (str): if unspecified, use self.path as directory return_filename (bool): return the file-name corresponding to this save """ try_import_torch() import torch if directory is not None: path = directory + file_prefix else: path = self.path + file_prefix params_filepath = path + self.params_file_name os.makedirs(os.path.dirname(path), exist_ok=True) if self.model is not None: torch.save(self.model, params_filepath) self.model = None # Avoiding pickling the weights. modelobj_filepath = super().save(path=path, verbose=verbose) if return_filename: return modelobj_filepath
def __init__(self, X, y=None, col_info=None, **kwargs): try_import_torch() import torch self.encoders = kwargs['encoders'] self.kwargs = kwargs self.col_info = col_info self.raw_data = X if y is None: self.targets = None elif self.kwargs['problem_type'] == 'regression': self.targets = torch.FloatTensor(y) else: self.targets = torch.LongTensor(y) if col_info is None: self.columns = get_col_info( X ) #this is a stop-gap -- it just sets all feature types to CATEGORICAL. else: self.columns = self.col_info """must be a list of dicts, each dict is of the form {"name": col_name, "type": col_type} where col_name is obtained from the df X, and col_type is CATEGORICAL, TEXT or SCALAR #TODO FIX THIS self.ds_info['meta']['columns'][1:] """ self.cat_feat_origin_cards = None self.cont_feat_origin = None self.feature_encoders = None
def __init__(self, n_cont_features, norm_class_name, cat_feat_origin_cards, max_emb_dim, p_dropout, one_hot_embeddings, drop_whole_embeddings, **kwargs): super().__init__() try_import_torch() import torch.nn as nn self.kwargs = kwargs self.act_on_output = True self.max_emb_dim = max_emb_dim self.n_cont_features = n_cont_features self.cat_feat_origin_cards = cat_feat_origin_cards self.norm_class = nn.__dict__[norm_class_name] self.p_dropout = p_dropout self.drop_whole_embeddings = drop_whole_embeddings self.one_hot_embeddings = one_hot_embeddings self.cat_initializers = nn.ModuleDict() if isinstance(self.cat_feat_origin_cards, list): for col_name, card in self.cat_feat_origin_cards: self.cat_initializers[col_name] = EmbeddingInitializerClass.EmbeddingInitializer(card, max_emb_dim, p_dropout, drop_whole_embeddings=drop_whole_embeddings, one_hot=one_hot_embeddings) self.init_feat_dim = sum(i.emb_dim for i in self.cat_initializers.values()) + self.n_cont_features
def encode(self, feature_encoders): try_import_torch() import torch if self.encoders is not None: self.feature_encoders = feature_encoders self.cat_feat_origin_cards = [] cat_features = [] self.cont_feat_origin = [] cont_features = [] for c in self.columns: enc = feature_encoders[c['name']] col = self.raw_data[c['name']] cat_feats = enc.enc_cat(col) if cat_feats is not None: self.cat_feat_origin_cards += [ (f'{c["name"]}_{i}_{c["type"]}', card) for i, card in enumerate(enc.cat_cards) ] cat_features.append(cat_feats) cont_feats = enc.enc_cont(col) if cont_feats is not None: self.cont_feat_origin += [c['name']] * enc.cont_dim cont_features.append(cont_feats) if cat_features: self.cat_data = torch.cat(cat_features, dim=1) else: self.cat_data = None if cont_features: self.cont_data = torch.cat(cont_features, dim=1) else: self.cont_data = None
def one_hot(x, card): try_import_torch() import torch assert isinstance(x, torch.LongTensor) assert x.dim() == 2 x_one_hot = x.new_zeros(x.size()[0], card).scatter_(1, x, 1) return x_one_hot
def __setstate__(self, state): try_import_torch() import torch.nn.functional as F if 'activation' not in state: state['activation'] = F.relu super(TransformerClass.TransformerEncoderLayer_modified, self).__setstate__(state)
def enc_cont(self, scalars): """ Returns len(scalars) x 2 tensor, where the second column is a one-hot flag for missing data values """ try_import_torch() import torch scalars = self.clean_data(scalars, dtype='float') null_flag = np.full(len(scalars), np.nan, dtype=np.float32) vals = np.full(len(scalars), np.nan, dtype=np.float32) null_idxs = np.where(np.array(scalars) == None)[0] val_idxs = np.where(np.array(scalars) != None)[0] # One-hot flag for missing values null_flag[null_idxs] = 1 null_flag[val_idxs] = 0 null_flag = null_flag.reshape(-1, 1) # Transform scalar values vals[val_idxs] = np.array(scalars, dtype=np.float32)[val_idxs] vals = vals.reshape(-1, 1) vals = self.scaler.transform(vals) + 1e-7 # Extra 1e-7 to help with correctness testing vals[null_idxs] = 0 encoded = np.hstack((vals, null_flag)) encoded = encoded.clip(-5, 5) # Guarding against outlier values return torch.FloatTensor(encoded)
def enc_cont(self, datetimes): try_import_torch() import torch datetimes = self.clean_data(datetimes) df = pd.DataFrame({'dt': datetimes}) add_datepart(df, field_name='dt', prefix='', drop=False) df = add_cyclic_datepart(df, field_name='dt', prefix='', drop=False) enc = torch.empty(len(datetimes), self.cont_dim) feats_done = 0 for c, t in self.cols_types: feats_doing = (1 if t == 'float' else t) if t == 'float': feats = torch.FloatTensor(df[c].to_numpy()).view(-1, 1) if c == 'Year': feats = (feats - 2000) / 10 elif c == 'Dayofyear': feats /= 365 else: feats = torch.LongTensor(df[c].to_numpy().astype('int32')).view(-1, 1) if c in ['Month', 'Week', 'Day']: feats -= 1 feats = one_hot(feats, t) enc[:, feats_done: feats_done + feats_doing] = feats feats_done += feats_doing return enc
def __init__(self, kwargs): super().__init__() try_import_torch() import torch.nn as nn self.kwargs = kwargs self.loss_funct = nn.MSELoss( ) if kwargs['problem_type'] == REGRESSION else nn.CrossEntropyLoss( )
def enc_cont(self, data): try_import_torch() import torch data = self.clean_data(data) text_strings = [s if s is not None else '' for s in data] encoded = self.get_encoded(text_strings) encoded = self.scaler.transform(encoded) encoded = torch.Tensor(encoded) return encoded
def __init__(self, num_embeddings, max_emb_dim, p_dropout, minimize_emb_dim=True, drop_whole_embeddings=False, one_hot=False, out_dim=None, shared_embedding=False, n_shared_embs=8, shared_embedding_added=False): """ :param minimize_emb_dim: Whether to set embedding_dim = max_emb_dim or to make embedding_dim smaller is num_embeddings is small :param drop_whole_embeddings: If True, dropout pretends the embedding was a missing value. If false, dropout sets embed features to 0 :param one_hot: If True, one-hot encode variables whose cardinality is < max_emb_dim. Also, set reqiures_grad = False :param out_dim: If None, return the embedding straight from self.embed. If another dimension, put the embedding through a Linear layer to make it size (batch x out_dim). :param shared_embedding: If True, 1/(n_shared_embs)th of every embedding will be reserved for a learned parameter that's common to all embeddings. This is useful for transformers to identify which column an embedding came from. Mutually exclusive with one_hot. Note: the 0 embedding is reserved for padding and masking. The various encoders use 1 for missing values. """ super().__init__() try_import_torch() import torch import torch.nn as nn assert not (one_hot and out_dim is not None) self.p_dropout = p_dropout self.drop_whole_embeddings = drop_whole_embeddings self.shared_embedding = shared_embedding self.shared_embedding_added = shared_embedding_added if minimize_emb_dim or one_hot: self.emb_dim = min(max_emb_dim, num_embeddings) # Don't use a crazy huge embedding if not needed else: self.emb_dim = max_emb_dim self.reshape_out = nn.Identity() if out_dim is not None: assert self.emb_dim <= out_dim, 'Makes no sense: just set max_emb_dim = out_dim and out_dim = None' if num_embeddings > self.emb_dim: self.reshape_out = nn.Linear(self.emb_dim, out_dim, bias=True) else: self.emb_dim = out_dim # Note: if you change the name of self.embed, or initialize an embedding elsewhere in a model, # the function get_optim will not work properly self.embed = nn.Embedding(num_embeddings=num_embeddings + 1, embedding_dim=self.emb_dim, padding_idx=0) self.embed.weight.data.clamp_(-2, 2) # Use truncated normal init if one_hot: self.embed.weight.requires_grad = False if num_embeddings <= max_emb_dim: self.embed.weight.data[1:, :] = torch.eye(self.emb_dim) if shared_embedding: assert not one_hot ce_dim = self.emb_dim if shared_embedding_added else ( out_dim if out_dim else self.emb_dim) // n_shared_embs # used to be //8 self.shared_emb = nn.Parameter(torch.empty(1, ce_dim).uniform_(-1, 1)) self.do = nn.Dropout(p=p_dropout)
def enc_cat(self, data): """ Values that the encoder has never seen before are returned as 1. 0 is reserved for padding. """ try_import_torch() import torch data = self.clean_data(data) idxs = [self._item_to_idx.get(item, 1) for item in data] return torch.LongTensor(idxs).unsqueeze(1)
def reset_parameters(self) -> None: try_import_torch() from torch.nn import init init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def _get_activation_fn(activation): try_import_torch() import torch.nn.functional as F if activation == "relu": return F.relu elif activation == "gelu": return F.gelu raise RuntimeError("activation should be relu/gelu, not {}".format(activation))
def build_loader(self, shuffle=False): try_import_torch() from torch.utils.data import DataLoader loader = DataLoader(self, batch_size=self.kwargs['batch_size'], shuffle=shuffle, num_workers=16, pin_memory=True) loader.cat_feat_origin_cards = self.cat_feat_origin_cards return loader
def augmentation(data, target, mask_prob=0.4, num_augs=1): try_import_torch() import torch shape = data.shape cat_data = torch.cat([data for _ in range(num_augs)]) target = torch.cat([target for _ in range(num_augs)]).view(-1) locs_to_mask = torch.empty_like(cat_data, dtype=float).uniform_() < mask_prob cat_data[locs_to_mask] = 0 cat_data = cat_data.view(-1, shape[-1]) return cat_data, target
def __init__(self, num_class, params, cat_feat_origin_cards): super(TabNetClass.TabNet, self).__init__() try_import_torch() import torch.nn as nn self.params = params self.params['cat_feat_origin_cards'] = cat_feat_origin_cards self.embed = TabTransformer(**self.params['tab_kwargs'], **params) relu, lin = nn.ReLU(), nn.Linear(2 * self.params['feature_dim'], num_class, bias=True) self.fc = nn.Sequential(*[relu, lin])
def init_input(self, input): try_import_torch() import torch feats = [init(input[:, i]) for i, init in enumerate(self.cat_initializers.values())] if self.readout == 'readout_emb': readout_emb = self.readout_emb.expand_as(feats[0]) feat_embs = torch.stack([readout_emb] + feats, dim=0) # (n_feat_embeddings + 1) x batch x hidden_dim else: feat_embs = torch.stack(feats, dim=0) # n_feat_embeddings x batch x hidden_dim return feat_embs
def enc_cont(self, data): try_import_torch() import torch data = self.clean_data(data) text_strings = np.array([d if d is not None else '' for d in data]) encoded = self.tfidf.transform(text_strings) encoded = torch.Tensor(encoded.todense()) # todo: wait until pytorch lets you use multiproc with sparse tensors # encoded = encoded.tocoo() # i = torch.LongTensor(np.vstack((encoded.row, encoded.col))) # v = torch.FloatTensor(encoded.data) # encoded = torch.sparse.FloatTensor(i, v, torch.Size(encoded.shape)) return encoded
def enc_cat(self, data): # todo: add support for missing values, which should get encoded as 1. try_import_torch() import torch data = LatLongScalarEnc().enc_cont(data) feats = [] for col, disc in enumerate(self.discs): d = data[:, col].reshape(-1, 1) d = disc.transform(d).reshape(-1) d = d + 2 # for missing and padding feats.append(d) feats = np.stack(feats, axis=1) return torch.LongTensor(feats)
def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: super(TransformerClass.Linear, self).__init__() try_import_torch() import torch from torch.nn.parameter import Parameter self.in_features = in_features self.out_features = out_features self.weight = Parameter(torch.Tensor(out_features, in_features)) if bias: self.bias = Parameter(torch.Tensor(out_features)) else: self.register_parameter('bias', None) self.reset_parameters()
def enc_cat(self, datetimes): try_import_torch() import torch # todo: add support for missing values, which should get encoded as 1. datetimes = self.clean_data(datetimes) df = pd.DataFrame({'dt': datetimes}) add_datepart(df, field_name='dt', prefix='', drop=False) feats = [] for c, t in self.cols_types: f = torch.LongTensor(df[c].to_numpy().astype('int32')) if c in ['Month', 'Week', 'Day']: f -= 1 feats.append(f) feats = torch.stack(feats, dim=1) + 2 # + 2 for missing and padding return feats
def _predict_proba(self, X, preprocess=False): """ X (torch.tensor or pd.dataframe): data for model to give prediction probabilities returns: np.array of k-probabilities for each of the k classes. If k=2 we drop the second probability. """ try_import_torch() import torch import torch.nn as nn from torch.utils.data import DataLoader from torch.autograd import Variable if isinstance(X, pd.DataFrame): if preprocess: X = self.preprocess(X) # Internal preprocessing, renaming col names, tt specific. X, _, _ = self._tt_preprocess(X, fe=self.fe) loader = X.build_loader() elif isinstance(X, DataLoader): loader = X elif isinstance(X, torch.Tensor): X = X.rename(columns=self._get_no_period_columns(X)) loader = X.build_loader() else: raise ("Bad type into fit.") # TODO: Better error self.model.eval() softmax = nn.Softmax(dim=1) outputs = torch.zeros([len(loader.dataset), self.num_classes]) iter = 0 for data, _ in loader: if self.params['device'].type == "cuda": data = data.cuda() with torch.no_grad(): data = Variable(data) out, _ = self.model(data) batch_size = len(out) prob = softmax(out) outputs[iter:(iter + batch_size)] = prob iter += batch_size if self.problem_type == BINARY: return outputs[:, 1].cpu().numpy() return outputs.cpu().numpy()
def set_default_params(self, y_train): try_import_torch() import torch default_params = get_default_param(self.problem_type, y_train.nunique()) for param, val in default_params.items(): self._set_default_param_value(param, val) # TODO: Take in num_gpu's as a param. Currently this is hard-coded upon detection of cuda. if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") self.params['device'] = device
def __init__(self, kwargs, replacement_noise='random', p_replace=0.3): super().__init__() try_import_torch() import torch import torch.nn as nn self.kwargs = kwargs self.loss_funct = nn.CrossEntropyLoss() self.p_replace = p_replace self.predicters = nn.ModuleList() self.n_cat_feats = len(kwargs['cat_feat_origin_cards']) self.replacement_noise = replacement_noise number_devices = torch.cuda.device_count() for col in range(self.n_cat_feats): lin = nn.Linear(kwargs['tab_kwargs']['hidden_dim'], 2) self.predicters.append(lin)
class _LinearWithBias(Linear): try_import_torch() import torch bias: torch.Tensor def __init__(self, in_features: int, out_features: int) -> None: super().__init__(in_features, out_features, bias=True)
def __init__(self, embed_dim, n_cat_embeddings, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None): super(TransformerClass.MultiheadAttention, self).__init__() try_import_torch() import torch from torch.nn.parameter import Parameter self.embed_dim = embed_dim self.kdim = kdim if kdim is not None else embed_dim self.vdim = vdim if vdim is not None else embed_dim self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim self.num_heads = num_heads self.dropout = dropout self.head_dim = embed_dim // num_heads assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" if self._qkv_same_embed_dim is False: self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) self.register_parameter('in_proj_weight', None) self.register_parameter('fixed_k', None) else: self.in_proj_weight = Parameter(torch.empty(embed_dim, embed_dim)) #self.in_proj_weight = Parameter(torch.empty(2 * embed_dim, embed_dim)) self.fixed_k = Parameter(torch.empty(n_cat_embeddings,embed_dim)) self.fixed_q = Parameter(torch.empty(n_cat_embeddings,embed_dim)) self.register_parameter('q_proj_weight', None) self.register_parameter('k_proj_weight', None) self.register_parameter('v_proj_weight', None) if bias: self.in_proj_bias = Parameter(torch.empty(embed_dim)) else: self.register_parameter('in_proj_bias', None) self.out_proj = TransformerClass._LinearWithBias(embed_dim, embed_dim) if add_bias_kv: self.bias_k = Parameter(torch.empty(1, 1, embed_dim)) self.bias_v = Parameter(torch.empty(1, 1, embed_dim)) else: self.bias_k = self.bias_v = None self.add_zero_attn = add_zero_attn self._reset_parameters()
def forward(self, input): try_import_torch() import torch if self.drop_whole_embeddings and self.training: mask = torch.zeros_like(input).bernoulli_(1 - self.p_dropout) input = input * mask out = self.embed(input) if not self.drop_whole_embeddings: out = self.do(out) out = self.reshape_out(out) if self.shared_embedding: shared_emb = self.shared_emb.expand(out.shape[0], -1) if not self.shared_embedding_added: out[:, :shared_emb.shape[1]] = shared_emb else: out += shared_emb return out
def load(cls, path, file_prefix="", reset_paths=False, verbose=True): """ file_prefix (str): Appended to beginning of file-name. If you want to load files with given prefix, can also pass arg: path = directory+file_prefix """ try_import_torch() import torch path = path + file_prefix obj: TabTransformerModel = load_pkl.load(path=path + cls.model_file_name, verbose=verbose) if reset_paths: obj.set_contexts(path) obj.model = torch.load(path + cls.params_file_name) return obj """
def forward(self, out, target): try_import_torch() import torch prob = torch.cat([ self.predicters[col](out[:, col, :]).unsqueeze(1) for col in range(self.n_cat_feats) ], dim=1) prob = prob.view(-1, 2) target = target.view(-1) loss = self.loss_funct(prob, target) pred = prob.max(dim=1, keepdim=True)[1] correct = pred.eq(target.view_as(pred)).sum() correct = correct.float() correct = correct / pred.shape[0] return loss, correct