Exemplo n.º 1
0
    def __init__(self, model_file):
        self.model_file = os.path.abspath(model_file)

        # label
        label_map_reverse_file = os.path.join(
            self.model_file, 'label_map_reverse.json')
        with tf.gfile.GFile(label_map_reverse_file, 'r') as f:
            self.label_map_reverse = json.load(f)
        self.labels = [item[1] for item in sorted(
            self.label_map_reverse.items(), key=lambda i: i[0])]

        # model
        model_config_file = os.path.join(
            self.model_file, 'model_config.json')
        with tf.gfile.GFile(model_config_file, 'r') as f:
            self.model_config = json.load(f)
        self.model_name = self.model_config.get('model_name') or None
        self.model_type = self.model_config.get('model_type') or None
        self.vocab_file = self.model_config.get('vocab_file') or None
        self.max_seq_len = self.model_config.get('max_seq_len') or 512
        if not self.model_name:
            assert all([self.vocab_file, self.model_type]), \
                'If not given model_name provided by open_sources, ' \
                'you should specify the model_type and vocab_file.'
        else:
            assert self.model_name in pretrained_names, \
                '%s not provided by open_sources' % self.model_name
            self.model_type = pretrained_types.get(self.model_name).split('_')[0]
            pretrained_dir = get_pretrained_model(pretrained_name=self.model_name)
            self.vocab_file = os.path.join(pretrained_dir, 'vocab.txt')

        # tokenizer
        if self.model_type == 'bert':
            self.tokenizer = FullTokenizer(self.vocab_file)
        elif self.model_type == 'albert':
            self.tokenizer = FullTokenizer(self.vocab_file)
        else:
            raise ValueError('model_type %s unknown.' % self.model_type)

        # processor
        self._load_processor()

        # build graph
        self._build()

        # load cache
        self.cache_file = os.path.join(
            self.model_file, 'cache.txt')
        self._load_cache()
Exemplo n.º 2
0
 def _from_pretrained(self):
     pretrained_dir = get_pretrained_model(pretrained_name=self.model_name)
     self.vocab_file = os.path.join(pretrained_dir, 'vocab.txt')
     self.config_file = os.path.join(pretrained_dir, 'config.json')
     self.init_checkpoint_file = os.path.join(pretrained_dir, 'model.ckpt')