Python L2Regularizer示例，allennlp.nn.regularizers.L2Regularizer Python示例

示例#1

0

显示文件

文件： MortalityMain.py 项目： medical-projects/multimodal_fairness

def build_model(vocab: Vocabulary,
                args,
                **kwargs) -> Model:
    print("Building the model")
    vocab_size = vocab.get_vocab_size("tokens")
    EMBED_DIMS = 200

    if args.pretrained_WE_path:
    # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings
        embedder = BasicTextFieldEmbedder(
            {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size,
                                 pretrained_file=args.pretrained_WE_path, vocab=vocab, )})

    else:
        embedder = BasicTextFieldEmbedder(
            {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size)})

    encoder = CnnEncoder(embedding_dim=EMBED_DIMS, ngram_filter_sizes = (2,3,5),
                         num_filters=5) # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f

    # encoder = BertPooler("bert-base-cased")
    # the output dim is just the num filters *len(ngram_filter_sizes)

    #     construct the regularizer applicator
    regularizer_applicator = None
    if args.use_reg :
        l2_reg = L2Regularizer()
        regexes = [("embedder", l2_reg),
                   ("encoder", l2_reg),
                   ("classifier", l2_reg)
                   ]
        regularizer_applicator = RegularizerApplicator(regexes)

    return MortalityClassifier(vocab, embedder, encoder,regularizer_applicator,**kwargs)

示例#2

0

显示文件

文件： DecompensationMain.py 项目： medical-projects/multimodal_fairness

def build_model(vocab: Vocabulary, use_reg: bool = True) -> Model:
    print("Building the model")
    vocab_size = vocab.get_vocab_size("tokens")
    EMBED_DIMS = 300
    # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings
    embedder = BasicTextFieldEmbedder({
        "tokens":
        Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size)
    })
    encoder = CnnEncoder(
        embedding_dim=EMBED_DIMS,
        ngram_filter_sizes=(2, 3, 4, 5),
        num_filters=5
    )  # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f
    # encoder = BertPooler("bert-base-cased")
    # the output dim is just the num filters *len(ngram_filter_sizes)

    #     construct the regularizer applicator
    regularizer_applicator = None
    if use_reg:
        l2_reg = L2Regularizer()
        regexes = [("embedder", l2_reg), ("encoder", l2_reg),
                   ("classifier", l2_reg)]
        regularizer_applicator = RegularizerApplicator(regexes)

    return DecompensationClassifier(vocab, embedder, encoder,
                                    regularizer_applicator)

示例#3

0

显示文件

文件： regularizers_test.py 项目： feiga/bilm-tf

 def test_l2_regularization(self):
     model = torch.nn.Sequential(
             torch.nn.Linear(5, 10),
             torch.nn.Linear(10, 5)
     )
     initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 0.5))])
     initializer(model)
     value = RegularizerApplicator([("", L2Regularizer(1.0))])(model)
     assert value.data.numpy() == 28.75

示例#4

0

显示文件

文件： regularizers_test.py 项目： feiga/bilm-tf

 def test_regularizer_applicator_respects_regex_matching(self):
     model = torch.nn.Sequential(
             torch.nn.Linear(5, 10),
             torch.nn.Linear(10, 5)
     )
     initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 1.))])
     initializer(model)
     value = RegularizerApplicator([("weight", L2Regularizer(0.5)),
                                    ("bias", L1Regularizer(1.0))])(model)
     assert value.data.numpy() == 65.0

示例#5

0

显示文件

文件： regularizers_test.py 项目： uclanlp/Fast-and-Robust-Text-Classification

 def test_l2_regularization(self):
     model = torch.nn.Sequential(torch.nn.Linear(5, 10),
                                 torch.nn.Linear(10, 5))
     constant_init = Initializer.from_params(
         Params({
             "type": "constant",
             "val": 0.5
         }))
     initializer = InitializerApplicator([(".*", constant_init)])
     initializer(model)
     value = RegularizerApplicator([("", L2Regularizer(1.0))])(model)
     assert value.data.numpy() == 28.75

示例#6

0

显示文件

文件： regularizers_test.py 项目： uclanlp/Fast-and-Robust-Text-Classification

 def test_regularizer_applicator_respects_regex_matching(self):
     model = torch.nn.Sequential(torch.nn.Linear(5, 10),
                                 torch.nn.Linear(10, 5))
     constant_init = Initializer.from_params(
         Params({
             "type": "constant",
             "val": 1.
         }))
     initializer = InitializerApplicator([(".*", constant_init)])
     initializer(model)
     value = RegularizerApplicator([("weight", L2Regularizer(0.5)),
                                    ("bias", L1Regularizer(1.0))])(model)
     assert value.data.numpy() == 65.0

示例#7

0

显示文件

def build_model_Transformer(vocab: Vocabulary, use_reg: bool = True) -> Model:
    print("Building the model")
    vocab_size = vocab.get_vocab_size("tokens")
    EMBED_DIMS = 300
    # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings
    embedder = PretrainedTransformerEmbedder(BERT_MODEL_NAME)
    encoder = BertPooler(
        BERT_MODEL_NAME
    )  # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f
    # encoder = BertPooler("bert-base-cased")
    # the output dim is just the num filters *len(ngram_filter_sizes)

    #     construct the regularizer applicator
    regularizer_applicator = None
    if use_reg:
        l2_reg = L2Regularizer()
        regexes = [("embedder", l2_reg), ("encoder", l2_reg),
                   ("classifier", l2_reg)]
        regularizer_applicator = RegularizerApplicator(regexes)

    return MortalityClassifier(vocab, embedder, encoder,
                               regularizer_applicator)

示例#8

0

显示文件

def run_training_loop():
    tokenizer = BERTTokenizer(vocab_file='/Users/tianhongzxy/Downloads/BiSentESIM/BiSentESIM/My-pipeline/allennlp_tutorial/BertTokenizer/vocab.txt')
    # tokenizer = BERTTokenizer('bert-base-multilingual-cased') # same as above

    # Try to use ELMo
    # tokenindexer = ELMoTokenCharactersIndexer()
    # elmo_tokens = tokenindexer.tokens_to_indices([Token("happy")], None)
    # print(len(elmo_tokens["elmo_tokens"][0]), elmo_tokens)

    # Try to use BERT
    # tokenizer = PretrainedTransformerTokenizer(
    #     model_name="bert-base-multilingual-cased",
    #     add_special_tokens=True,
    #     max_length=512
    # )
    # token_indexer = PretrainedTransformerIndexer(
    #     model_name="bert-base-multilingual-cased",
    #     max_length=512,
    # )

    cached_directory = None # "cached_dir"
    dataset_reader = ClassificationTsvReader(tokenizer=tokenizer, cache_directory=cached_directory)
    print("Reading data")
    train_data = dataset_reader.read(file_path='/Users/tianhongzxy/Downloads/contradictory-my-dear-watson/train.txt')
    pretrained_files = None # {"tokens": "/Users/tianhongzxy/Downloads/BiSentESIM/BiSentESIM/embedding/glove.6B.300d.txt"}
    cuda_device = -1
    batch_size = 8
    vocab = build_vocab(train_data, pretrained_files=pretrained_files, include_full_pretrained_words=False)
    init_uniform = XavierUniformInitializer()
    # init_uniform(model.embedder.token_embedder_tokens.weight)
    init_const = ConstantInitializer(val=0)
    # init_const(model.classifier.bias)
    init_normal = NormalInitializer(mean=0., std=1.)
    # init_normal(model.classifier.weight)
    applicator = InitializerApplicator(
        regexes=[
            ('embedder.*', init_uniform),
            ('classifier.*weight', init_normal),
            ('classifier.*bias', init_const)
        ]
    )
    regularizer = RegularizerApplicator(
        regexes=[
            ('embedder.*', L2Regularizer(alpha=1e-3)),
            ('classifier.*weight', L2Regularizer(alpha=1e-3)),
            # ('classifier.*bias', L1Regularizer(alpha=1e-2)) # 不要对bias进行正则，否则容易欠拟合
        ]
    )
    model = build_model(vocab,
                        embedding_dim=10,
                        pretrained_file=None, # pretrained_files["tokens"]
                        initializer=applicator,
                        regularizer=regularizer
                        )
    if cuda_device >= 0:
        model = model.cuda(cuda_device)

    # split train data into train & dev data
    from allennlp.data.dataset_readers import AllennlpDataset
    print('origin train data size: ', len(train_data))
    train_data, dev_data = train_test_split(train_data, test_size=0.2, random_state=20020206)
    assert type(train_data[0]) == type(dev_data[0]) == Instance
    train_data, dev_data = AllennlpDataset(train_data), AllennlpDataset(dev_data)
    print('train data size: ', len(train_data), 'dev data size', len(dev_data))
    assert type(train_data) == type(dev_data) == AllennlpDataset
    train_data.index_with(vocab)
    dev_data.index_with(vocab)

    train_loader, dev_loader = build_data_loaders(train_data=train_data,
                                                  dev_data=dev_data,
                                                  batch_size=batch_size)

    with tempfile.TemporaryDirectory() as serialization_dir:
        # serialization_dir = 'temp_dir/'
        trainer = build_trainer(
            model=model,
            serialization_dir=serialization_dir,
            train_loader=train_loader,
            dev_loader=dev_loader,
            num_epochs=5,
            cuda_device=cuda_device,
            patience=5
        )
        print("Starting training")
        trainer.train()
        print("Finished training")
        # Evaluate model on test data
        # print("Starting testing")
        # test_data = dataset_reader.read('test.txt')
        # test_data.index_with(vocab)
        # data_loader = DataLoader(test_data, batch_size=batch_size)
        # results = evaluate(model, data_loader, cuda_device=cuda_device)
        # print('Test results: ', results)

    # outputs = model.forward_on_instances(instances)
    # print(outputs)
    return model, dataset_reader

示例#9

0

显示文件

    def train(self, args_hpo, index):
        """
        trains the model, and return the metrics to the meta optimizer.
        :param args_hpo:
        :param index:
        :return:
        """
        PrintColors.prYellow('\n===== training with: {}'.format(args_hpo))
        PrintColors.prGreen('----- in {} mode -----'.format('train'))
        ''' ============ LOAD DATA ================================================================================ '''
        starting_time = time.time()
        lm_dataset_reader = LanguageModelSegmentReader(global_constants=GLOBAL_CONSTANTS)
        train_data, val_data = (lm_dataset_reader.read(folder) for folder in
                                [_train_data_path, _val_data_path])
        lm_vocabulary = Vocabulary.from_instances(train_data + val_data)
        iterator = BasicIterator(batch_size=args_hpo.batch_size)
        iterator.index_with(lm_vocabulary)
        ''' ============ DEFINE MODEL ============================================================================= '''
        ''' 
        the class params 'pop' its parameters i.e. they disappear after first use. So we instantiate a Params 
        instance for each model defining execution. More than that, they turn dicts into Mutable mappings and 
        destroys the original dict. So here's your copy allennlp. Thanks. (I still love you)
        '''
        token_embedding = Embedding.from_params(vocab=lm_vocabulary,
                                                params=Params(copy.deepcopy(GLOBAL_CONSTANTS.GLOVE_PARAMS_CONFIG)))

        token_embedder: TextFieldEmbedder = BasicTextFieldEmbedder({'tokens': token_embedding})
        ''' define encoder to wrap up an lstm feature extractor '''
        contextualizer: Seq2SeqEncoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(input_size=args_hpo.word_embedding_size,
                          hidden_size=args_hpo.ed_ncoder_size,
                          bidirectional=False, batch_first=True))

        model = LanguageModel(vocab=lm_vocabulary,
                              text_field_embedder=token_embedder,
                              contextualizer=contextualizer,
                              dropout=args_hpo.dropout,
                              regularizer=RegularizerApplicator([('l2', L2Regularizer(alpha=args_hpo.l2))]),
                              )\
            .cuda(_device)

        ''' ============ TRAIN ================================================================================ '''
        '''  callbacks  '''
        if index == 0:
            for file in os.listdir(os.path.join(*['.', 'lm_models'])):
                path = os.path.join(*['.', 'lm_models', file])
                if os.path.isfile(path):
                    os.remove(path)
                else:
                    shutil.rmtree(path)
        serialization_path = 'models_lm_{}_{}'.format(_tag, index)
        serialization_path_longer = os.path.join(*['.', 'lm_models', serialization_path])
        vocab_path = 'vocab_lm_{}_{}'.format(_tag, index)
        vocab_dir_longer = os.path.join(*['.', 'lm_models', vocab_path])
        if not os.path.exists(serialization_path_longer):
            os.mkdir(serialization_path_longer)
        callbacks = list()
        ''' for validation '''
        callbacks.append(validate.Validate(validation_data=val_data, validation_iterator=iterator))
        ''' for early stopping. it tracks 'loss' returned by model.forward() '''
        callbacks.append(track_metrics.TrackMetrics(patience=3))
        ''' for grad clipping '''
        callbacks.append(gradient_norm_and_clip.GradientNormAndClip(grad_clipping=args_hpo.clip))
        ''' 
            for checkpointing
            TODO: NOTE:serialization path CANNOT exist before training ??
        '''
        model_checkpointer = checkpointer.Checkpointer(serialization_dir=serialization_path_longer,
                                                       num_serialized_models_to_keep=1)
        callbacks.append(checkpoint.Checkpoint(checkpointer=model_checkpointer))
        ''' for sample generations '''

        callback_trainer = CallbackTrainer(
            model=model,
            training_data=train_data,
            iterator=iterator,
            optimizer=torch.optim.Adam(model.parameters(), lr=args_hpo.lr),
            num_epochs=_n_epochs,
            serialization_dir=serialization_path_longer,
            cuda_device=_device,
            callbacks=callbacks
        )

        ''' trainer saves the model, but the vocabulary needs to be saved, too '''
        lm_vocabulary.save_to_files(vocab_dir_longer)

        ''' check the metric names to synchronize with the class '''
        metrics = callback_trainer.train()
        metrics['time_consumed(hrs)'] = round((time.time() - starting_time) / 3600, 4)

        return metrics

示例#10

0

显示文件

文件： build_seq2idx.py 项目： xidiandp/neu-compression-sum

def build_model(
        vocab, embed_dim: int = 100,
        hid_dim: int = 100,
        min_dec_step: int = 2,
        max_decoding_steps: int = 3,
        fix_edu_num: int = -1,
        use_elmo: bool = False,
        dropout=0.5,
        dropout_emb=0.2, span_encoder_type='self_attentive',
        attn_type='dot',
        schedule_ratio_from_ground_truth=0.7,
        pretrain_embedding=None,
        nenc_lay: int = 1,
        mult_orac_sampling: bool = True,
        compression: bool = True,
        word_token_indexers=None,
        alpha: float = 1.0,
        dbg: bool = False,
        dec_avd_trigram_rep: bool = True,
        aggressive_compression: int = -1,
        keep_threshold: float = 0.5,
        weight_alpha=0.0,
        bias_alpha=0.0,
        abs_board_file: str = "/home/cc/exComp/board.txt",
        compress_leadn=-1,
        gather='mean',
        abs_dir_root: str = "/scratch/cluster/jcxu",
        serilization_name="",
        load_save_model: str = None
):
    model = Seq2IdxSum(
        vocab=vocab,
        word_embedding_dim=embed_dim,
        hidden_dim=hid_dim, min_dec_step=min_dec_step,
        max_decoding_steps=max_decoding_steps,
        fix_edu_num=fix_edu_num,
        use_elmo=use_elmo, span_encoder_type=span_encoder_type,
        dropout=dropout, dropout_emb=dropout_emb,
        attn_type=attn_type,
        schedule_ratio_from_ground_truth=schedule_ratio_from_ground_truth,
        pretrain_embedding_file=pretrain_embedding,
        nenc_lay=nenc_lay,
        mult_orac_sampling=mult_orac_sampling,
        word_token_indexers=word_token_indexers,
        compression=compression, alpha=alpha,
        dbg=dbg,
        dec_avd_trigram_rep=dec_avd_trigram_rep,
        aggressive_compression=aggressive_compression,
        keep_threshold=keep_threshold,
        regularizer=RegularizerApplicator([("weight", L2Regularizer(weight_alpha)),
                                           ("bias", L1Regularizer(bias_alpha))]),
        abs_board_file=abs_board_file,
        gather=gather,
        compress_leadn=compress_leadn,
        abs_dir_root=abs_dir_root,
        serilization_name=serilization_name
    )
    if load_save_model:
        model.load_state_dict(torch.load(load_save_model, map_location=get_device()))
    #         `` model.load_state_dict(torch.load("/path/to/model/weights.th"))``

    # model = torch.nn.DataParallel(model)
    device = get_device()
    model = model.to(device)
    return model

示例#11

0

显示文件

''' the language model used Glove but we just build an embedder to load the trained parameters '''
token_embedding = Embedding(
    num_embeddings=vocabulary.get_vocab_size(namespace='tokens'),
    embedding_dim=combination.word_embedding_size,
    padding_index=0)
token_embedder: TextFieldEmbedder = BasicTextFieldEmbedder(
    {'tokens': token_embedding})
''' define encoder to wrap up an lstm feature extractor '''
contextualizer: Seq2SeqEncoder = PytorchSeq2SeqWrapper(
    torch.nn.LSTM(input_size=combination.word_embedding_size,
                  hidden_size=combination.ed_ncoder_size,
                  bidirectional=False,
                  batch_first=True))
model = LanguageModel(vocab=vocabulary,
                      text_field_embedder=token_embedder,
                      contextualizer=contextualizer,
                      dropout=combination.dropout,
                      regularizer=RegularizerApplicator([('l2', L2Regularizer(alpha=combination.l2))]),
                      ) \
    .cuda(device)
model.load_state_dict(torch.load(open(language_model_path, 'rb')), strict=True)
dataset_reader = LanguageModelSegmentReader(global_constants=GLOBAL_CONSTANTS)
language_model_predictor = Predictor(model=model,
                                     dataset_reader=dataset_reader)
val_data_path = os.path.join('.', 'data_seg_val_toytoy')
instances = dataset_reader.read(val_data_path)
predictions = [
    language_model_predictor.predict_instance(instance)
    for instance in instances
]

示例#12

0

显示文件

文件： model_regularization.py 项目： reiyw/allennlp-guide


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = torch.nn.Linear(2, 3)
        self.linear2 = torch.nn.Linear(3, 2)
        self.conv = torch.nn.Conv1d(2, 2, 2)

    def forward(self, inputs):
        pass


print('Using individual regularizers:')
model = Net()
init_const = ConstantInitializer(val=10.)
init_const(model.linear1.weight)
init_const(model.linear2.weight)

l1_regularizer = L1Regularizer(alpha=0.01)
print(l1_regularizer(model.linear1.weight))  # 0.01 * 10 * 6 = 0.6

l2_regularizer = L2Regularizer(alpha=0.01)
print(l2_regularizer(model.linear2.weight))  # 0.01 * (10)^2 * 6

print('Using an applicator:')
applicator = RegularizerApplicator(
    regexes=[('linear1.weight',
              L1Regularizer(alpha=.01)), ('linear2.weight', L2Regularizer())])
print(applicator(model))  # 0.6 + 6

示例#13

0

显示文件

class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = torch.nn.Linear(2, 3)
        self.linear2 = torch.nn.Linear(3, 2)
        self.conv = torch.nn.Conv1d(2, 2, 2)

    def forward(self, inputs):
        pass


print("Using individual regularizers:")
model = Net()
init_const = ConstantInitializer(val=10.0)
init_const(model.linear1.weight)
init_const(model.linear2.weight)

l1_regularizer = L1Regularizer(alpha=0.01)
print(l1_regularizer(model.linear1.weight))  # 0.01 * 10 * 6 = 0.6

l2_regularizer = L2Regularizer(alpha=0.01)
print(l2_regularizer(model.linear2.weight))  # 0.01 * (10)^2 * 6

print("Using an applicator:")
applicator = RegularizerApplicator(regexes=[
    ("linear1.weight", L1Regularizer(alpha=0.01)),
    ("linear2.weight", L2Regularizer()),
])
print(applicator(model))  # 0.6 + 6