Exemplo n.º 1
0
    def __init__(self, mode, model_file):
        """
        BERT NLU initialization.

        Args:
            mode (str):
                can be either `'usr'`, `'sys'` or `'all'`, representing which side of data the model was trained on.

            model_file (str):
                trained model path or url, should be coherent with mode.

        Example:
            nlu = BERTNLU(mode='usr', model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/bert_camrest_usr.zip')
        """
        assert mode == 'usr' or mode == 'sys' or mode == 'all'
        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs/camrest_{}.json'.format(mode))
        config = json.load(open(config_file))
        DEVICE = config['DEVICE']
        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        data_dir = os.path.join(root_dir, config['data_dir'])
        output_dir = os.path.join(root_dir, config['output_dir'])

        if not os.path.exists(os.path.join(data_dir, 'data.pkl')):
            preprocess(mode)

        data = pickle.load(open(os.path.join(data_dir, 'data.pkl'), 'rb'))
        intent_vocab = pickle.load(open(os.path.join(data_dir, 'intent_vocab.pkl'), 'rb'))
        tag_vocab = pickle.load(open(os.path.join(data_dir, 'tag_vocab.pkl'), 'rb'))

        dataloader = Dataloader(data, intent_vocab, tag_vocab, config['model']["pre-trained"])

        best_model_path = os.path.join(output_dir, 'bestcheckpoint.tar')
        if not os.path.exists(best_model_path):
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            print('Load from model_file param')
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(root_dir)
            archive.close()
        print('Load from', best_model_path)
        checkpoint = torch.load(best_model_path, map_location=DEVICE)
        print('train step', checkpoint['step'])

        model = BertNLU(config['model'], dataloader.intent_dim, dataloader.tag_dim,
                        DEVICE=DEVICE,
                        intent_weight=dataloader.intent_weight)
        model_dict = model.state_dict()
        state_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if k in model_dict.keys()}
        model_dict.update(state_dict)
        model.load_state_dict(model_dict)
        model.to(DEVICE)
        model.eval()

        self.model = model
        self.dataloader = dataloader
        print("BERTNLU loaded")
Exemplo n.º 2
0
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        print('Load from zipped_model_path param')
        archive_file = cached_path(
            os.path.join(root_dir, config['zipped_model_path']))
        archive = zipfile.ZipFile(archive_file, 'r')
        archive.extractall(root_dir)
        archive.close()
    print('Load from', best_model_path)
    checkpoint = torch.load(best_model_path, map_location=DEVICE)
    print('best_intent_step', checkpoint['best_intent_step'])
    print('best_tag_step', checkpoint['best_tag_step'])

    model = BertNLU(config['model'],
                    dataloader.intent_dim,
                    dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model_dict = model.state_dict()
    state_dict = {
        k: v
        for k, v in checkpoint['model_state_dict'].items()
        if k in model_dict.keys()
    }
    model_dict.update(state_dict)
    model.load_state_dict(model_dict)
    model.to(DEVICE)
    model.eval()

    batch_size = config['batch_size']
    batch_num = len(dataloader.data['test']) // batch_size + 1
Exemplo n.º 3
0
                                                     'bestcheckpoint.tar')
    if not os.path.exists(best_model_path):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        print('Load from zipped_model_path param')
        archive_file = cached_path(config['zipped_model_path'])
        archive = zipfile.ZipFile(archive_file, 'r')
        archive.extractall()
        archive.close()
    print('Load from', best_model_path)
    checkpoint = torch.load(best_model_path, map_location=DEVICE)
    print('train step', checkpoint['step'])

    model = BertNLU(config['model'],
                    dataloader.intent_dim,
                    dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model_dict = model.state_dict()
    state_dict = {
        k: v
        for k, v in checkpoint['model_state_dict'].items()
        if k in model_dict.keys()
    }
    model_dict.update(state_dict)
    model.load_state_dict(model_dict)
    model.to(DEVICE)
    model.eval()

    batch_size = config['batch_size']
Exemplo n.º 4
0
    print('intent num:', len(intent_vocab))
    print('tag num:', len(tag_vocab))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    writer = SummaryWriter(log_dir)

    dataloader = Dataloader(data, intent_vocab, tag_vocab,
                            config['model']["pre-trained"])

    model = BertNLU(config['model'],
                    dataloader.intent_dim,
                    dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model.to(DEVICE)
    intent_save_params = []
    tag_save_params = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape, param.device)
            if 'intent' in name:
                intent_save_params.append(name)
            elif 'tag' in name:
                tag_save_params.append(name)
            else:
                # Not support finetune bert params yet
                assert 0
Exemplo n.º 5
0
    for key in data:
        print('{} set size: {}'.format(key,len(data[key])))
    print('intent num:', len(intent_vocab))
    print('tag num:', len(tag_vocab))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    writer = SummaryWriter(log_dir)

    dataloader = Dataloader(data, intent_vocab, tag_vocab, config['model']["pre-trained"])

    model = BertNLU(config['model'], dataloader.intent_dim, dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model.to(DEVICE)
    intent_save_params = []
    tag_save_params = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape, param.device)
            if 'intent' in name:
                intent_save_params.append(name)
            elif 'tag' in name:
                tag_save_params.append(name)
            else:
                # Not support finetune bert params yet
                assert 0
    print('intent params:',intent_save_params)