Пример #1
0
    def __init__(
        self,
        vocab_path=None,
        model_paths=None,
        weigths=None,
        max_len=50,
        min_len=3,
        lowercase_tokens=False,
        log=False,
        iterations=3,
        model_name='roberta',
        special_tokens_fix=1,
        is_ensemble=True,
        min_error_probability=0.0,
        confidence=0,
        del_confidence=0,
        resolve_cycles=False,
    ):
        self.model_weights = list(map(
            float, weigths)) if weigths else [1] * len(model_paths)
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.max_len = max_len
        self.min_len = min_len
        self.lowercase_tokens = lowercase_tokens
        self.min_error_probability = min_error_probability
        self.vocab = Vocabulary.from_files(vocab_path)
        self.log = log
        self.iterations = iterations
        self.confidence = confidence
        self.del_conf = del_confidence
        self.resolve_cycles = resolve_cycles
        # set training parameters and operations

        self.indexers = []
        self.models = []
        for model_path in model_paths:
            if is_ensemble:
                model_name, special_tokens_fix = self._get_model_data(
                    model_path)
            weights_name = get_weights_name(model_name, lowercase_tokens)
            self.indexers.append(
                self._get_indexer(weights_name, special_tokens_fix))
            model = Seq2Labels(
                vocab=self.vocab,
                text_field_embedder=self._get_embbeder(weights_name,
                                                       special_tokens_fix),
                confidence=self.confidence,
                del_confidence=self.del_conf,
            ).to(self.device)
            if torch.cuda.is_available():
                model.load_state_dict(torch.load(model_path), strict=False)
            else:
                model.load_state_dict(torch.load(
                    model_path, map_location=torch.device('cpu')),
                                      strict=False)
            model.eval()
            self.models.append(model)
Пример #2
0
def get_model(model_name, vocab, tune_bert=False,
              predictor_dropout=0,
              label_smoothing=0.0,
              confidence=0,
              special_tokens_fix=0):
    token_embs = get_token_embedders(model_name, tune_bert=tune_bert, special_tokens_fix=special_tokens_fix)
    model = Seq2Labels(vocab=vocab, # 这行就是我们自己搭建的后续网络到label.
                       text_field_embedder=token_embs,
                       predictor_dropout=predictor_dropout,
                       label_smoothing=label_smoothing,
                       confidence=confidence)
    return model
Пример #3
0
def get_model(model_name,
              vocab,
              tune_bert=False,
              predictor_dropout=0,
              label_smoothing=0.0,
              confidence=0,
              special_tokens_fix=0):  # 其实这个地方可以不用加载 就的xlnet,但是为了不出bug,我们就先不改了.
    token_embs = get_token_embedders(model_name,
                                     tune_bert=tune_bert,
                                     special_tokens_fix=special_tokens_fix)
    model = Seq2Labels(
        vocab=vocab,  # 这行就是我们自己搭建的后续网络到label. # 这个需要更改参数shape,并且保留之前学习到的参数
        text_field_embedder=token_embs,
        predictor_dropout=predictor_dropout,
        label_smoothing=label_smoothing,
        confidence=confidence)
    return model
Пример #4
0
    def __init__(
            self,
            vocab_path=None,
            model_paths=None,
            weigths=None,
            max_len=50,
            min_len=3,
            lowercase_tokens=False,
            log=False,
            iterations=3,
            min_probability=0.0,
            model_name='roberta',
            special_tokens_fix=1,
            is_ensemble=True,
            # is_ensemble=False,
            min_error_probability=0.0,
            confidence=0,
            resolve_cycles=False,
            prune_amount=0.,
            num_layers_to_keep=12):
        # print('here')
        self.model_weights = list(map(
            float, weigths)) if weigths else [1] * len(model_paths)
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.max_len = max_len
        self.min_len = min_len
        self.lowercase_tokens = lowercase_tokens
        self.min_probability = min_probability
        self.min_error_probability = min_error_probability
        self.vocab = Vocabulary.from_files(vocab_path)
        self.log = log
        self.iterations = iterations
        self.confidence = confidence
        self.resolve_cycles = resolve_cycles
        # set training parameters and operations

        self.indexers = []
        self.models = []

        for model_path in model_paths:
            # print('model_path:', model_path); exit(0)
            if is_ensemble:
                model_name, special_tokens_fix = self._get_model_data(
                    model_path)
            weights_name = get_weights_name(model_name, lowercase_tokens)
            self.indexers.append(
                self._get_indexer(weights_name, special_tokens_fix))
            # token_embs = get_token_embedders(model_name, tune_bert=1, special_tokens_fix=special_tokens_fix)

            model = Seq2Labels(
                vocab=self.vocab,
                text_field_embedder=self._get_embbeder(weights_name,
                                                       special_tokens_fix),
                # text_field_embedder= token_embs,
                confidence=self.confidence).to(self.device)
            # count number of params
            pytorch_total_params = sum(p.numel() for p in model.parameters())
            print('total params:', pytorch_total_params)

            # print('model:', model)
            print('type:', type(model))
            #exit(0)

            if torch.cuda.is_available():
                model.load_state_dict(torch.load(model_path))
            else:
                model.load_state_dict(
                    torch.load(model_path, map_location=torch.device('cpu')))
            # print('chk1'); exit(0)
            # get model size
            def print_size_of_model(model):
                torch.save(model.state_dict(), "temp.p")
                print('Size (MB):', os.path.getsize("temp.p") / 1e6)
                os.remove('temp.p')

            # print(model)
            print_size_of_model(model)
            #exit(0)
            print('type:', type(model))
            #exit(0)

            def deleteEncodingLayers(
                    model,
                    num_layers_to_keep):  # must pass in the full bert model
                oldModuleList = model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer
                # print('oldModuleList:', oldModuleList)
                # print('oldModuleList:', len(oldModuleList)); exit(0)

                newModuleList = nn.ModuleList()

                # Now iterate over all layers, only keeping only the relevant layers.
                for i in range(0, num_layers_to_keep):
                    # for i in range(0, len(num_layers_to_keep)):
                    newModuleList.append(oldModuleList[i])

                # create a copy of the model, modify it with the new list, and return
                copyOfModel = copy.deepcopy(model)
                copyOfModel.text_field_embedder.token_embedder_bert.bert_model.encoder.layer = newModuleList

                return copyOfModel

            print('before model 12:', model)
            # model = deleteEncodingLayers(model, 12)
            # print('after 12:', model)
            # print_size_of_model(model)
            #
            # print('before model:', model)
            # model = deleteEncodingLayers(model, 11)
            # print ('after 11:', model)
            # print_size_of_model(model)

            model = deleteEncodingLayers(model, num_layers_to_keep)
            print('after', num_layers_to_keep, ' :', model)
            print_size_of_model(model)

            # exit(0)

            # # save model
            # torch.save(model, 'pytorch-saved.pth')
            #
            # print('model:', model)
            #
            # for name, module in model.named_modules():
            #     print('name:', name)
            #     print('module:', module)

            # exit(0)

            #  onnx_batch_size = 64
            #  dummy_input = {'tokens': {
            # 'bert': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')),
            # 'bert-offsets':torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')),
            # 'mask': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0'))
            #  }}
            #  # # print('dummy_input:', dummy_input.shape)
            #  # # pred = model(dummy_input['tokens'])
            #  # # print('pred:', pred)
            #  # d_inp = (dummy_input['tokens']['bert'], dummy_input['tokens']['bert-offsets'],
            #  #          dummy_input['tokens']['mask'])
            #  d_inp = dummy_input['tokens']
            #  input_names = ['bert', 'bert-offsets', 'mask']
            #  output_names = ['output']
            #
            #  # convert model to onnx
            #  torch.onnx.export(model, d_inp, 'bert_64.onnx',
            #                    input_names=input_names, output_names=output_names, verbose = False)
            #  # torch.onnx.export(model, dummy_input['tokens'], 'try.onnx', verbose=False)
            #  # d_inp = {'bert': np.zeros(shape=(1, 64), 'bert-offsets': np.zeros(1, 64), 'mask': torch.zeros(1, 64)}
            #  exit(0)

            # model = torch.quantization.quantize_dynamic(
            #     model,
            #     # {torch.nn.Linear},
            #     dtype=torch.qint8
            # )
            # print_size_of_model(model)

            # ##########################
            # # # quantized_model = torch.quantization.quantize_dynamic(
            # # #     model, {torch.nn.Linear}, dtype=torch.qint8
            # # # )
            # quantized_model = torch.quantization.quantize_dynamic(
            #     model.cpu(),
            #     # model,
            #     # {torch.nn.Linear},
            #     dtype=torch.qint8
            # )
            # # # print_size_of_model(model)
            # print_size_of_model(quantized_model)
            # # # quantized_model.cuda()
            # # # exit(0)
            #
            # quantized_model.eval()
            # self.models.append(quantized_model)
            # #######################################

            # prune model

            #################################################
            # random unstructured
            # model = prune.random_unstructured(model, 'weight', amount=0.2)
            # # l1_unstructured
            # # m = prune.l1_unstructured(model, 'weight', amount=0.2)
            # # m = prune.l1_unstructured(model, 'bias', amount=3)
            print_size_of_model(model)
            for name, module in model.named_modules():
                # print('name:', name)
                # print('module:', module)#; exit(0)
                # prune.random_unstructured(module, name='weight', amount=0.2)

                # # prune 20% of connections in all 2D-conv layers
                # if isinstance(module, torch.nn.Conv2d):
                #     prune.l1_unstructured(module, name='weight', amount=0.2)
                # prune 40% of connections in all linear layers
                if isinstance(module, torch.nn.Linear):
                    # print('prune_amount:', prune_amount)
                    # print('.....pruning.....')
                    # print('before pruning:', torch.sum(module.weight)); #exit(0)
                    # print(list(module.named_parameters()))
                    prune.l1_unstructured(module,
                                          name='weight',
                                          amount=prune_amount)
                    # print('shape:', module.weight.shape); #exit(0)
                    # prune.ln_structured(module, name='weight', amount=prune_amount, n=1, dim=module.weight.shape[1])
                    # print(list(module.named_parameters())); exit(0)
                    # print('after pruning:', torch.sum(module.weight));
                    prune.remove(module, name='weight')
                    # module.weight = torch.nn.Parameter(module.weight.data.to_sparse())
                    # print('after removing:', torch.sum(module.weight));
                    # print('shape:', module.weight.shape); exit(0)

                    # exit(0)

                    # prune.random_unstructured(module, name='weight', amount=0.25)
                    # exit(0)
            # exit(0)
            print('About to return')
            print_size_of_model(model)
            #exit(0)
            ##############################################################

            model.eval()
            self.models.append(model)
Пример #5
0
    def __init__(self, vocab_path=None, model_paths=None,
                 weigths=None,
                 max_len=50,
                 min_len=3,
                 lowercase_tokens=False,
                 log=False,
                 iterations=3,
                 min_probability=0.0,
                 model_name='roberta',
                 special_tokens_fix=1,
                 is_ensemble=True,
                 # is_ensemble=False,
                 min_error_probability=0.0,
                 confidence=0,
                 resolve_cycles=False,
                 prune_amount=0.,
                 num_layers_to_keep=12
                 ):
        # print('here')
        self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths)
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.max_len = max_len
        self.min_len = min_len
        self.lowercase_tokens = lowercase_tokens
        self.min_probability = min_probability
        self.min_error_probability = min_error_probability
        self.vocab = Vocabulary.from_files(vocab_path)
        self.log = log
        self.iterations = iterations
        self.confidence = confidence
        self.resolve_cycles = resolve_cycles
        # set training parameters and operations

        self.indexers = []
        self.models = []

        for model_path in model_paths:
            # print('model_path:', model_path); exit(0)
            if is_ensemble:
                model_name, special_tokens_fix = self._get_model_data(model_path)
            weights_name = get_weights_name(model_name, lowercase_tokens)
            self.indexers.append(self._get_indexer(weights_name, special_tokens_fix))
            # token_embs = get_token_embedders(model_name, tune_bert=1, special_tokens_fix=special_tokens_fix)

            model = Seq2Labels(vocab=self.vocab,
                               text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix),
                               # text_field_embedder= token_embs,
                               confidence=self.confidence
                               ).to(self.device)
            # count number of params
            pytorch_total_params = sum(p.numel() for p in model.parameters())
            print('total params:', pytorch_total_params)

            # print('model:', model)
            print('type:', type(model)); #exit(0)

            def print_size_of_model(model):
                torch.save(model.state_dict(), "temp.p")
                print('Size (MB):', os.path.getsize("temp.p") / 1e6)
                os.remove('temp.p')

            # delete top layers
            def deleteEncodingLayers(model, num_layers_to_keep):  # must pass in the full bert model
                oldModuleList = model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer
                # print('oldModuleList:', oldModuleList)
                # print('oldModuleList:', len(oldModuleList)); exit(0)

                newModuleList = nn.ModuleList()

                # Now iterate over all layers, only keeping only the relevant layers.
                for i in range(0, num_layers_to_keep):
                # for i in range(0, len(num_layers_to_keep)):
                    newModuleList.append(oldModuleList[i])

                # create a copy of the model, modify it with the new list, and return
                copyOfModel = copy.deepcopy(model)
                copyOfModel.text_field_embedder.token_embedder_bert.bert_model.encoder.layer = newModuleList

                return copyOfModel

            print('before model 12:')
            # model = deleteEncodingLayers(model, 12)
            # print('after 12:', model)
            print_size_of_model(model)
            #
            # print('before model:', model)
            # model = deleteEncodingLayers(model, 11)
            # print ('after 11:', model)
            # print_size_of_model(model)

            model = deleteEncodingLayers(model, num_layers_to_keep)
            print('after', num_layers_to_keep,':', model)
            print_size_of_model(model)

            if torch.cuda.is_available():
                model.load_state_dict(torch.load(model_path))
            else:
                model.load_state_dict(torch.load(model_path,
                                                 map_location=torch.device('cpu')))
            # print('chk1'); exit(0)
            # get model size


            # print(model)
            print_size_of_model(model); #exit(0)
            print('type:', type(model)); #exit(0)


            model.eval()
            self.models.append(model)