def base_cfg(): cfg = CfgNode() cfg.version = 1 cfg.optimization = base_optimization_config() cfg.learning = base_learning_config() cfg.model = base_model_config() cfg.misc = base_misc_config() cfg.freeze() return cfg
def __setstate__(self, state): tokenizer_name = state['_tokenizer_name'] tokenizer = get_tokenizer(tokenizer_name) state['_tokenizer'] = tokenizer state['_logger'] = logging state['_cfg'] = CfgNode.load_cfg(state['_cfg']) self.__dict__ = state
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.feature_units = -1 # -1 means not given and we will use the units of BERT # TODO(sxjscience) Use a class to store the TextNet cfg.text_net = CfgNode() cfg.text_net.use_segment_id = True cfg.text_net.pool_type = 'cls' cfg.agg_net = FeatureAggregator.get_cfg() cfg.categorical_net = CategoricalFeatureNet.get_cfg() cfg.numerical_net = NumericalFeatureNet.get_cfg() cfg.initializer = CfgNode() cfg.initializer.weight = ['truncnorm', 0, 0.02] cfg.initializer.bias = ['zeros'] return cfg else: raise NotImplementedError
def base_model_config(): cfg = CfgNode() cfg.preprocess = CfgNode() cfg.preprocess.merge_text = True cfg.preprocess.max_length = 128 cfg.backbone = CfgNode() cfg.backbone.name = 'google_electra_base' cfg.network = BERTForTabularBasicV1.get_cfg() return cfg
def base_learning_config(): cfg = CfgNode() cfg.early_stopping_patience = 10 # Stop if we cannot find a better checkpoint cfg.valid_ratio = 0.15 # The ratio of dataset to split for validation cfg.stop_metric = 'auto' # Automatically define the stopping metric cfg.log_metrics = 'auto' # Automatically determine the metrics used in logging return cfg
def base_preprocess_cfg(): cfg = CfgNode() cfg.text = CfgNode() cfg.text.merge = True # Whether we will merge different text columns # or treat them independently. cfg.text.max_length = 512 # The maximum possible length. cfg.text.auto_max_length = True # Try to automatically shrink the maximal length # based on the statistics of the dataset. cfg.text.auto_max_length_quantile = 0.95 # We will ensure that the new max_length is around the quantile of the lengths of all samples cfg.text.auto_max_length_round_to = 32 # We will ensure that the automatically determined max length will be divisible by round_to cfg.categorical = CfgNode() cfg.categorical.minimum_cat_count = 100 # The minimal number of data per categorical group cfg.categorical.maximum_num_cat = 20 # The minimal number of data per categorical group cfg.categorical.convert_to_text = False # Whether to convert the feature to text cfg.numerical = CfgNode() cfg.numerical.convert_to_text = False # Whether to convert the feature to text cfg.numerical.impute_strategy = 'mean' # Whether to use mean to fill in the missing values. # We use the imputer in sklearn: https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html # The strategies can thus be "mean", "median", "most_frequent". cfg.numerical.scaler_with_mean = True # Whether to normalize with mean cfg.numerical.scaler_with_std = True # Whether to normalize with std cfg.freeze() return cfg
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.agg_type = 'concat' cfg.mid_units = 256 cfg.feature_proj_num_layers = -1 cfg.out_proj_num_layers = 0 cfg.data_dropout = False cfg.dropout = 0.1 cfg.activation = 'tanh' cfg.normalization = 'layer_norm' cfg.norm_eps = 1e-5 cfg.initializer = CfgNode() cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0] cfg.initializer.bias = ['zeros'] else: raise NotImplementedError return cfg
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.input_centering = False cfg.mid_units = 128 cfg.num_layers = 1 cfg.data_dropout = False cfg.dropout = 0.1 cfg.activation = 'leaky' cfg.normalization = 'layer_norm' cfg.norm_eps = 1e-5 cfg.initializer = CfgNode() cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0] cfg.initializer.bias = ['zeros'] else: raise NotImplementedError return cfg
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.emb_units = 32 cfg.mid_units = 64 cfg.num_layers = 1 cfg.data_dropout = False cfg.dropout = 0.1 cfg.activation = 'leaky' cfg.normalization = 'layer_norm' cfg.norm_eps = 1e-5 cfg.initializer = CfgNode() cfg.initializer.embed = ['xavier', 'gaussian', 'in', 1.0] cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0] cfg.initializer.bias = ['zeros'] return cfg else: raise NotImplementedError
def base_misc_config(): cfg = CfgNode() cfg.seed = 123 cfg.exp_dir = './autonlp' return cfg
def base_optimization_config(): """The basic optimization phase""" cfg = CfgNode() cfg.lr_scheduler = 'triangular' cfg.optimizer = 'adamw' cfg.optimizer_params = [('beta1', 0.9), ('beta2', 0.999), ('epsilon', 1e-6), ('correct_bias', False)] cfg.begin_lr = 0.0 cfg.batch_size = 32 cfg.model_average = 5 cfg.per_device_batch_size = 16 # Per-device batch-size cfg.val_batch_size_mult = 2 # By default, we double the batch size for validation cfg.lr = 1E-4 cfg.final_lr = 0.0 cfg.num_train_epochs = 3 cfg.warmup_portion = 0.1 cfg.layerwise_lr_decay = 0.8 # The layer_wise decay cfg.wd = 0.01 # Weight Decay cfg.max_grad_norm = 1.0 # Maximum Gradient Norm # The validation frequency = validation frequency * num_updates_in_an_epoch cfg.valid_frequency = 0.1 # Logging frequency = log frequency * num_updates_in_an_epoch cfg.log_frequency = 0.1 return cfg
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.base_feature_units = -1 # -1 means not given and we will use the units of BERT cfg.text_net = CfgNode() cfg.text_net.use_segment_id = True cfg.text_net.pool_type = 'cls' cfg.aggregate_categorical = True # Whether to use one network to aggregate the categorical columns. cfg.categorical_agg = CfgNode() cfg.categorical_agg.activation = 'leaky' cfg.categorical_agg.mid_units = 128 cfg.categorical_agg.num_layers = 1 cfg.categorical_agg.dropout = 0.1 cfg.categorical_agg.gated_activation = False cfg.agg_net = FeatureAggregator.get_cfg() cfg.categorical_net = CategoricalFeatureNet.get_cfg() cfg.numerical_net = NumericalFeatureNet.get_cfg() cfg.initializer = CfgNode() cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0] cfg.initializer.bias = ['zeros'] return cfg else: raise NotImplementedError
def get_cfg(key=None): if key is None: cfg = CfgNode() cfg.agg_type = 'concat' # Attention Aggregator cfg.attention_net = CfgNode() cfg.attention_net.num_layers = 6 cfg.attention_net.units = 64 cfg.attention_net.num_heads = 4 cfg.attention_net.hidden_size = -1 # Size of the FFN network used in attention cfg.attention_net.activation = 'gelu' # Activation of the attention # Other parameters cfg.mid_units = 128 cfg.feature_proj_num_layers = -1 cfg.out_proj_num_layers = 1 cfg.data_dropout = False cfg.dropout = 0.1 cfg.activation = 'leaky' cfg.normalization = 'layer_norm' cfg.norm_eps = 1e-5 cfg.initializer = CfgNode() cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0] cfg.initializer.bias = ['zeros'] else: raise NotImplementedError return cfg