Exemplo n.º 1
0
    def __init__(
        self,
        archive_file=DEFAULT_ARCHIVE_FILE,
        model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/mle_policy_camrest.zip'
    ):
        root_dir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))))

        with open(
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'config.json'), 'r') as f:
            cfg = json.load(f)

        voc_file = os.path.join(root_dir, 'data/camrest/sys_da_voc.txt')
        voc_opp_file = os.path.join(root_dir, 'data/camrest/usr_da_voc.txt')
        self.vector = CamrestVector(voc_file, voc_opp_file)

        self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'],
                                          self.vector.da_dim).to(device=DEVICE)

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for MLE Policy is specified!")
            archive_file = cached_path(model_file)
        model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'save')
        if not os.path.exists(model_dir):
            os.mkdir(model_dir)
        if not os.path.exists(os.path.join(model_dir, 'best_mle.pol.mdl')):
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(model_dir)
        self.load(cfg['load'])
Exemplo n.º 2
0
    def __init__(self, mode):
        """
        SVM NLU initialization.

        Args:
            mode (str):
                can be either `'usr'`, `'sys'` or `'all'`, representing which side of data the model was trained on.

        Example:
            nlu = SVMNLU(mode='usr')
        """
        assert mode == 'usr' or mode == 'sys' or mode == 'all'
        model_file = 'https://tatk-data.s3-ap-northeast-1.amazonaws.com/svm_camresst_{}.zip'.format(
            mode)
        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   'configs/camrest_{}.cfg'.format(mode))
        self.config = configparser.ConfigParser()
        self.config.read(config_file)
        self.c = Classifier.classifier(self.config)
        model_path = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            self.config.get("train", "output"))
        model_dir = os.path.dirname(model_path)
        if not os.path.exists(model_path):
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
            print('Load from model_file param')
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
            archive.close()
        self.c.load(model_path)
Exemplo n.º 3
0
    def __init__(
        self,
        archive_file=DEFAULT_ARCHIVE_FILE,
        model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/vhus_simulator_camrest.zip'
    ):
        with open(
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'config.json'), 'r') as f:
            config = json.load(f)
        manager = UserDataManager()
        voc_goal_size, voc_usr_size, voc_sys_size = manager.get_voc_size()
        self.user = VHUS(config, voc_goal_size, voc_usr_size,
                         voc_sys_size).to(device=DEVICE)
        self.goal_gen = GoalGenerator()
        self.manager = manager
        self.user.eval()

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for VHUS Policy is specified!")
            archive_file = cached_path(model_file)
        model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'save')
        if not os.path.exists(model_dir):
            os.mkdir(model_dir)
        if not os.path.exists(os.path.join(model_dir, 'best_simulator.mdl')):
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(model_dir)
        self.load(config['load'])
Exemplo n.º 4
0
    def __init__(self,
                 model_file=DEFAULT_MODEL_URL,
                 name='Sequicity'):
        """
        Sequicity initialization

        Args:
            model_file (str):
                trained model path or url. default="https://tatk-data.s3-ap-northeast-1.amazonaws.com/sequicity_multiwoz.zip"

        Example:
            sequicity = Sequicity()
        """
        super(Sequicity, self).__init__(name=name)
        config_file = DEFAULT_CONFIG_FILE
        c = json.load(open(config_file))
        cfg.init_handler(c['tsdf_init'])
        if not os.path.exists(os.path.join(DEFAULT_DIRECTORY,'multiwoz/data')):
            print('down load data from', DEFAULT_ARCHIVE_FILE_URL)
            archive_file = cached_path(DEFAULT_ARCHIVE_FILE_URL)
            archive = zipfile.ZipFile(archive_file, 'r')
            print('unzip to', os.path.join(DEFAULT_DIRECTORY,'multiwoz/'))
            archive.extractall(os.path.join(DEFAULT_DIRECTORY,'multiwoz/'))
            archive.close()
        model_path = os.path.join(DEFAULT_DIRECTORY,c['tsdf_init']['model_path'])
        if not os.path.exists(model_path):
            model_dir = os.path.dirname(model_path)
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
            print('Load from model_file param')
            print('down load data from', model_file)
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            print('unzip to', model_dir)
            archive.extractall(model_dir)
            archive.close()

        torch.manual_seed(cfg.seed)
        torch.cuda.manual_seed(cfg.seed)
        random.seed(cfg.seed)
        np.random.seed(cfg.seed)
        self.m = Model('multiwoz')
        self.m.count_params()
        self.m.load_model()
        self.init_session()
Exemplo n.º 5
0
    def __init__(self, mode, model_file):
        """
        BERT NLU initialization.

        Args:
            mode (str):
                can be either `'usr'`, `'sys'` or `'all'`, representing which side of data the model was trained on.

            model_file (str):
                trained model path or url, should be coherent with mode.

        Example:
            nlu = BERTNLU(mode='usr', model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/bert_camrest_usr.zip')
        """
        assert mode == 'usr' or mode == 'sys' or mode == 'all'
        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs/camrest_{}.json'.format(mode))
        config = json.load(open(config_file))
        DEVICE = config['DEVICE']
        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        data_dir = os.path.join(root_dir, config['data_dir'])
        output_dir = os.path.join(root_dir, config['output_dir'])

        if not os.path.exists(os.path.join(data_dir, 'data.pkl')):
            preprocess(mode)

        data = pickle.load(open(os.path.join(data_dir, 'data.pkl'), 'rb'))
        intent_vocab = pickle.load(open(os.path.join(data_dir, 'intent_vocab.pkl'), 'rb'))
        tag_vocab = pickle.load(open(os.path.join(data_dir, 'tag_vocab.pkl'), 'rb'))

        dataloader = Dataloader(data, intent_vocab, tag_vocab, config['model']["pre-trained"])

        best_model_path = os.path.join(output_dir, 'bestcheckpoint.tar')
        if not os.path.exists(best_model_path):
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            print('Load from model_file param')
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(root_dir)
            archive.close()
        print('Load from', best_model_path)
        checkpoint = torch.load(best_model_path, map_location=DEVICE)
        print('train step', checkpoint['step'])

        model = BertNLU(config['model'], dataloader.intent_dim, dataloader.tag_dim,
                        DEVICE=DEVICE,
                        intent_weight=dataloader.intent_weight)
        model_dict = model.state_dict()
        state_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if k in model_dict.keys()}
        model_dict.update(state_dict)
        model.load_state_dict(model_dict)
        model.to(DEVICE)
        model.eval()

        self.model = model
        self.dataloader = dataloader
        print("BERTNLU loaded")
Exemplo n.º 6
0
    def __init__(self, mode, config_file, model_file):
        """
        BERT NLU initialization.

        Args:
            mode (str):
                can be either `'usr'`, `'sys'` or `'all'`, representing which side of data the model was trained on.

            model_file (str):
                model path or url

        Example:
            nlu = BERTNLU(mode='all', model_file='https://convlab.blob.core.windows.net/models/bert_multiwoz_all_context.zip')
        """
        assert mode == 'usr' or mode == 'sys' or mode == 'all'
        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs/{}'.format(config_file))
        config = json.load(open(config_file))
        DEVICE = config['DEVICE']
        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        data_dir = os.path.join(root_dir, config['data_dir'])
        output_dir = os.path.join(root_dir, config['output_dir'])

        if not os.path.exists(os.path.join(data_dir, 'intent_vocab.json')):
            preprocess(mode)

        intent_vocab = json.load(open(os.path.join(data_dir, 'intent_vocab.json')))
        tag_vocab = json.load(open(os.path.join(data_dir, 'tag_vocab.json')))
        dataloader = Dataloader(intent_vocab=intent_vocab, tag_vocab=tag_vocab,
                                pretrained_weights=config['model']['pretrained_weights'])

        print('intent num:', len(intent_vocab))
        print('tag num:', len(tag_vocab))

        bert_config = BertConfig.from_pretrained(config['model']['pretrained_weights'])

        best_model_path = os.path.join(output_dir, 'pytorch_model.bin')
        if not os.path.exists(best_model_path):
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            print('Load from model_file param')
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(root_dir)
            archive.close()
        print('Load from', best_model_path)
        model = JointBERT(bert_config, config['model'], DEVICE, dataloader.tag_dim, dataloader.intent_dim)
        model.load_state_dict(torch.load(os.path.join(output_dir, 'pytorch_model.bin'), DEVICE))
        model.to(DEVICE)
        model.eval()

        self.model = model
        self.dataloader = dataloader
        print("BERTNLU loaded")
Exemplo n.º 7
0
 def auto_download(self):
     """Automatically download the pretrained model and necessary data."""
     if os.path.exists(os.path.join(self.config_path, 'model/rnn_model_state_dict.th')) and \
         os.path.exists(os.path.join(self.config_path, 'selection_model_state_dict.th')):
         return
     models_dir = os.path.join(self.config_path, 'models')
     cached_path(self.file_url, models_dir)
     files = os.listdir(models_dir)
     target_file = ''
     for name in files:
         if name.endswith('.json'):
             target_file = name[:-5]
     try:
         assert target_file in files
     except Exception as e:
         print(
             'allennlp download file error: RnnRollout Deal_or_Not data download failed.'
         )
         raise e
     zip_file_path = os.path.join(models_dir, target_file + '.zip')
     shutil.copyfile(os.path.join(models_dir, target_file), zip_file_path)
     with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
         zip_ref.extractall(models_dir)
Exemplo n.º 8
0
 def auto_download(self):
     """Automatically download the pretrained model and necessary data."""
     if os.path.exists(os.path.join(self.data_dir, 'models')) and \
         os.path.exists(os.path.join(self.data_dir, 'data')) and \
         os.path.exists(os.path.join(self.data_dir, 'word-vectors')):
         return
     cached_path(self.file_url, self.data_dir)
     files = os.listdir(self.data_dir)
     target_file = ''
     for name in files:
         if name.endswith('.json'):
             target_file = name[:-5]
     try:
         assert target_file in files
     except Exception as e:
         print(
             'allennlp download file error: MDBT Multiwoz data download failed.'
         )
         raise e
     zip_file_path = os.path.join(self.data_dir, target_file + '.zip')
     shutil.copyfile(os.path.join(self.data_dir, target_file),
                     zip_file_path)
     with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
         zip_ref.extractall(self.data_dir)
Exemplo n.º 9
0
Arquivo: mle.py Projeto: zqwerty/tatk
    def load(self, archive_file, model_file, filename):
        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for MLE Policy is specified!")
            archive_file = cached_path(model_file)
        model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'save')
        if not os.path.exists(model_dir):
            os.mkdir(model_dir)
        if not os.path.exists(os.path.join(model_dir, 'best_mle.pol.mdl')):
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(model_dir)

        policy_mdl = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename + '_mle.pol.mdl')
        if os.path.exists(policy_mdl):
            self.policy.load_state_dict(torch.load(policy_mdl))
            print('<<dialog policy>> loaded checkpoint from file: {}'.format(policy_mdl))
Exemplo n.º 10
0
    def __init__(self, mode, config_file, model_file):
        assert mode == 'usr' or mode == 'sys' or mode == 'all'
        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   'configs/{}'.format(config_file))
        config = json.load(open(config_file))
        DEVICE = config['DEVICE']
        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        data_dir = os.path.join(root_dir, config['data_dir'])
        output_dir = os.path.join(root_dir, config['output_dir'])

        if not os.path.exists(os.path.join(data_dir, 'intent_vocab.json')):
            preprocess(mode)

        intent_vocab = json.load(
            open(os.path.join(data_dir, 'intent_vocab.json')))
        tag_vocab = json.load(open(os.path.join(data_dir, 'tag_vocab.json')))
        dataloader = Dataloader(
            intent_vocab=intent_vocab,
            tag_vocab=tag_vocab,
            pretrained_weights=config['model']['pretrained_weights'])

        print('intent num:', len(intent_vocab))
        print('tag num:', len(tag_vocab))

        bert_config = BertConfig.from_pretrained(
            config['model']['pretrained_weights'])

        best_model_path = os.path.join(output_dir, 'bestcheckpoint.tar')
        if not os.path.exists(best_model_path):
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            print('Load from model_file param')
            archive_file = cached_path(model_file)
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(root_dir)
            archive.close()
        print('Load from', best_model_path)
        model = JointBERT(bert_config, config['model'], DEVICE,
                          dataloader.tag_dim, dataloader.intent_dim)
        model.load_state_dict(
            torch.load(os.path.join(output_dir, 'pytorch_model.bin'), DEVICE))
        model.to(DEVICE)
        model.eval()

        self.model = model
        self.dataloader = dataloader
        print("BERTNLU loaded")
Exemplo n.º 11
0
def auto_download():
    model_path = os.path.join(os.path.dirname(__file__), os.pardir,  'model')
    data_path = os.path.join(os.path.dirname(__file__), os.pardir, 'data')
    db_path = os.path.join(os.path.dirname(__file__), os.pardir, 'db')
    root_path = os.path.join(os.path.dirname(__file__), os.pardir)

    urls = {model_path: 'https://tatk-data.s3-ap-northeast-1.amazonaws.com/mdrg_model.zip',
            data_path: 'https://tatk-data.s3-ap-northeast-1.amazonaws.com/mdrg_data.zip',
            db_path: 'https://tatk-data.s3-ap-northeast-1.amazonaws.com/mdrg_db.zip'}

    for path in [model_path, data_path, db_path]:
        if not os.path.exists(path):
            file_url = urls[path]
            print('Downloading from: ', file_url)
            archive_file = cached_path(file_url)
            print('Extracting...')
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(root_path)
Exemplo n.º 12
0
    def __init__(
        self,
        archive_file=DEFAULT_ARCHIVE_FILE,
        use_cuda=False,
        is_user=False,
        model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/nlg_sclstm_multiwoz.zip'
    ):

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for SC-LSTM is specified!")
            archive_file = cached_path(model_file)
        model_dir = os.path.dirname(os.path.abspath(__file__))
        if not os.path.exists(os.path.join(model_dir, 'resource')):
            archive = zipfile.ZipFile(archive_file, 'r')
            archive.extractall(model_dir)

        self.USE_CUDA = use_cuda
        self.args, self.config = parse(is_user)
        self.dataset = SimpleDatasetWoz(self.config)

        # get model hyper-parameters
        hidden_size = self.config.getint('MODEL', 'hidden_size')

        # get feat size
        d_size = self.dataset.do_size + self.dataset.da_size + self.dataset.sv_size  # len of 1-hot feat
        vocab_size = len(self.dataset.word2index)

        self.model = LMDeep('sclstm',
                            vocab_size,
                            vocab_size,
                            hidden_size,
                            d_size,
                            n_layer=self.args['n_layer'],
                            use_cuda=use_cuda)
        model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  self.args['model_path'])
        # print(model_path)
        assert os.path.isfile(model_path)
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()
        if use_cuda:
            self.model.cuda()
Exemplo n.º 13
0
    tag_vocab = pickle.load(open(os.path.join(data_dir, 'tag_vocab.pkl'),
                                 'rb'))
    for key in data:
        print('{} set size: {}'.format(key, len(data[key])))
    print('intent num:', len(intent_vocab))
    print('tag num:', len(tag_vocab))

    dataloader = Dataloader(data, intent_vocab, tag_vocab,
                            config['model']["pre-trained"])

    best_model_path = os.path.join(output_dir, 'bestcheckpoint.tar')
    if not os.path.exists(best_model_path):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        print('Load from zipped_model_path param')
        archive_file = cached_path(
            os.path.join(root_dir, config['zipped_model_path']))
        archive = zipfile.ZipFile(archive_file, 'r')
        archive.extractall(root_dir)
        archive.close()
    print('Load from', best_model_path)
    checkpoint = torch.load(best_model_path, map_location=DEVICE)
    print('best_intent_step', checkpoint['best_intent_step'])
    print('best_tag_step', checkpoint['best_tag_step'])

    model = BertNLU(config['model'],
                    dataloader.intent_dim,
                    dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model_dict = model.state_dict()
    state_dict = {
Exemplo n.º 14
0
                                 'rb'))
    for key in data:
        print('{} set size: {}'.format(key, len(data[key])))
    print('intent num:', len(intent_vocab))
    print('tag num:', len(tag_vocab))

    dataloader = Dataloader(data, intent_vocab, tag_vocab,
                            config['model']["pre-trained"])

    best_model_path = best_model_path = os.path.join(output_dir,
                                                     'bestcheckpoint.tar')
    if not os.path.exists(best_model_path):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        print('Load from zipped_model_path param')
        archive_file = cached_path(config['zipped_model_path'])
        archive = zipfile.ZipFile(archive_file, 'r')
        archive.extractall()
        archive.close()
    print('Load from', best_model_path)
    checkpoint = torch.load(best_model_path, map_location=DEVICE)
    print('train step', checkpoint['step'])

    model = BertNLU(config['model'],
                    dataloader.intent_dim,
                    dataloader.tag_dim,
                    DEVICE=DEVICE,
                    intent_weight=dataloader.intent_weight)
    model_dict = model.state_dict()
    state_dict = {
        k: v