示例#1
0
def base_cfg():
    cfg = CfgNode()
    cfg.version = 1
    cfg.optimization = base_optimization_config()
    cfg.learning = base_learning_config()
    cfg.model = base_model_config()
    cfg.misc = base_misc_config()
    cfg.freeze()
    return cfg
示例#2
0
 def __setstate__(self, state):
     tokenizer_name = state['_tokenizer_name']
     tokenizer = get_tokenizer(tokenizer_name)
     state['_tokenizer'] = tokenizer
     state['_logger'] = logging
     state['_cfg'] = CfgNode.load_cfg(state['_cfg'])
     self.__dict__ = state
示例#3
0
 def get_cfg(key=None):
     if key is None:
         cfg = CfgNode()
         cfg.feature_units = -1  # -1 means not given and we will use the units of BERT
         # TODO(sxjscience) Use a class to store the TextNet
         cfg.text_net = CfgNode()
         cfg.text_net.use_segment_id = True
         cfg.text_net.pool_type = 'cls'
         cfg.agg_net = FeatureAggregator.get_cfg()
         cfg.categorical_net = CategoricalFeatureNet.get_cfg()
         cfg.numerical_net = NumericalFeatureNet.get_cfg()
         cfg.initializer = CfgNode()
         cfg.initializer.weight = ['truncnorm', 0, 0.02]
         cfg.initializer.bias = ['zeros']
         return cfg
     else:
         raise NotImplementedError
示例#4
0
def base_model_config():
    cfg = CfgNode()
    cfg.preprocess = CfgNode()
    cfg.preprocess.merge_text = True
    cfg.preprocess.max_length = 128
    cfg.backbone = CfgNode()
    cfg.backbone.name = 'google_electra_base'
    cfg.network = BERTForTabularBasicV1.get_cfg()
    return cfg
示例#5
0
def base_learning_config():
    cfg = CfgNode()
    cfg.early_stopping_patience = 10  # Stop if we cannot find a better checkpoint
    cfg.valid_ratio = 0.15  # The ratio of dataset to split for validation
    cfg.stop_metric = 'auto'  # Automatically define the stopping metric
    cfg.log_metrics = 'auto'  # Automatically determine the metrics used in logging
    return cfg
示例#6
0
def base_preprocess_cfg():
    cfg = CfgNode()
    cfg.text = CfgNode()
    cfg.text.merge = True  # Whether we will merge different text columns
    # or treat them independently.
    cfg.text.max_length = 512  # The maximum possible length.
    cfg.text.auto_max_length = True  # Try to automatically shrink the maximal length
    # based on the statistics of the dataset.
    cfg.text.auto_max_length_quantile = 0.95  # We will ensure that the new max_length is around the quantile of the lengths of all samples
    cfg.text.auto_max_length_round_to = 32  # We will ensure that the automatically determined max length will be divisible by round_to
    cfg.categorical = CfgNode()
    cfg.categorical.minimum_cat_count = 100  # The minimal number of data per categorical group
    cfg.categorical.maximum_num_cat = 20  # The minimal number of data per categorical group
    cfg.categorical.convert_to_text = False  # Whether to convert the feature to text

    cfg.numerical = CfgNode()
    cfg.numerical.convert_to_text = False  # Whether to convert the feature to text
    cfg.numerical.impute_strategy = 'mean'  # Whether to use mean to fill in the missing values.
    # We use the imputer in sklearn: https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html
    # The strategies can thus be "mean", "median", "most_frequent".
    cfg.numerical.scaler_with_mean = True  # Whether to normalize with mean
    cfg.numerical.scaler_with_std = True  # Whether to normalize with std
    cfg.freeze()
    return cfg
示例#7
0
 def get_cfg(key=None):
     if key is None:
         cfg = CfgNode()
         cfg.agg_type = 'concat'
         cfg.mid_units = 256
         cfg.feature_proj_num_layers = -1
         cfg.out_proj_num_layers = 0
         cfg.data_dropout = False
         cfg.dropout = 0.1
         cfg.activation = 'tanh'
         cfg.normalization = 'layer_norm'
         cfg.norm_eps = 1e-5
         cfg.initializer = CfgNode()
         cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0]
         cfg.initializer.bias = ['zeros']
     else:
         raise NotImplementedError
     return cfg
示例#8
0
 def get_cfg(key=None):
     if key is None:
         cfg = CfgNode()
         cfg.input_centering = False
         cfg.mid_units = 128
         cfg.num_layers = 1
         cfg.data_dropout = False
         cfg.dropout = 0.1
         cfg.activation = 'leaky'
         cfg.normalization = 'layer_norm'
         cfg.norm_eps = 1e-5
         cfg.initializer = CfgNode()
         cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0]
         cfg.initializer.bias = ['zeros']
     else:
         raise NotImplementedError
     return cfg
示例#9
0
 def get_cfg(key=None):
     if key is None:
         cfg = CfgNode()
         cfg.emb_units = 32
         cfg.mid_units = 64
         cfg.num_layers = 1
         cfg.data_dropout = False
         cfg.dropout = 0.1
         cfg.activation = 'leaky'
         cfg.normalization = 'layer_norm'
         cfg.norm_eps = 1e-5
         cfg.initializer = CfgNode()
         cfg.initializer.embed = ['xavier', 'gaussian', 'in', 1.0]
         cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0]
         cfg.initializer.bias = ['zeros']
         return cfg
     else:
         raise NotImplementedError
示例#10
0
def base_misc_config():
    cfg = CfgNode()
    cfg.seed = 123
    cfg.exp_dir = './autonlp'
    return cfg
示例#11
0
def base_optimization_config():
    """The basic optimization phase"""
    cfg = CfgNode()
    cfg.lr_scheduler = 'triangular'
    cfg.optimizer = 'adamw'
    cfg.optimizer_params = [('beta1', 0.9), ('beta2', 0.999),
                            ('epsilon', 1e-6), ('correct_bias', False)]
    cfg.begin_lr = 0.0
    cfg.batch_size = 32
    cfg.model_average = 5
    cfg.per_device_batch_size = 16  # Per-device batch-size
    cfg.val_batch_size_mult = 2  # By default, we double the batch size for validation
    cfg.lr = 1E-4
    cfg.final_lr = 0.0
    cfg.num_train_epochs = 3
    cfg.warmup_portion = 0.1
    cfg.layerwise_lr_decay = 0.8  # The layer_wise decay
    cfg.wd = 0.01  # Weight Decay
    cfg.max_grad_norm = 1.0  # Maximum Gradient Norm
    # The validation frequency = validation frequency * num_updates_in_an_epoch
    cfg.valid_frequency = 0.1
    # Logging frequency = log frequency * num_updates_in_an_epoch
    cfg.log_frequency = 0.1
    return cfg
 def get_cfg(key=None):
     if key is None:
         cfg = CfgNode()
         cfg.base_feature_units = -1  # -1 means not given and we will use the units of BERT
         cfg.text_net = CfgNode()
         cfg.text_net.use_segment_id = True
         cfg.text_net.pool_type = 'cls'
         cfg.aggregate_categorical = True  # Whether to use one network to aggregate the categorical columns.
         cfg.categorical_agg = CfgNode()
         cfg.categorical_agg.activation = 'leaky'
         cfg.categorical_agg.mid_units = 128
         cfg.categorical_agg.num_layers = 1
         cfg.categorical_agg.dropout = 0.1
         cfg.categorical_agg.gated_activation = False
         cfg.agg_net = FeatureAggregator.get_cfg()
         cfg.categorical_net = CategoricalFeatureNet.get_cfg()
         cfg.numerical_net = NumericalFeatureNet.get_cfg()
         cfg.initializer = CfgNode()
         cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0]
         cfg.initializer.bias = ['zeros']
         return cfg
     else:
         raise NotImplementedError
    def get_cfg(key=None):
        if key is None:
            cfg = CfgNode()
            cfg.agg_type = 'concat'

            # Attention Aggregator
            cfg.attention_net = CfgNode()
            cfg.attention_net.num_layers = 6
            cfg.attention_net.units = 64
            cfg.attention_net.num_heads = 4
            cfg.attention_net.hidden_size = -1  # Size of the FFN network used in attention
            cfg.attention_net.activation = 'gelu'  # Activation of the attention

            # Other parameters
            cfg.mid_units = 128
            cfg.feature_proj_num_layers = -1
            cfg.out_proj_num_layers = 1
            cfg.data_dropout = False
            cfg.dropout = 0.1
            cfg.activation = 'leaky'
            cfg.normalization = 'layer_norm'
            cfg.norm_eps = 1e-5
            cfg.initializer = CfgNode()
            cfg.initializer.weight = ['xavier', 'uniform', 'avg', 3.0]
            cfg.initializer.bias = ['zeros']
        else:
            raise NotImplementedError
        return cfg