Exemplo n.º 1
0
 def __init__(self,
              frame_path=None,
              element_path=None,
              bert_model='bert-base-cased',
              max_length=256):
     self.frame_vocabulary = Vocabulary.load(frame_path)
     self.element_vocabulary = Vocabulary.load(element_path)
     self.tokenizer = BertTokenizer.from_pretrained(bert_model)
     self.max_length = max_length
Exemplo n.º 2
0
 def __init__(self,
              trigger_path=None,
              argument_path=None,
              bert_model='bert-base-cased',
              max_length=256):
     if trigger_path and argument_path:
         self.trigger_vocabulary = Vocabulary.load(trigger_path)
         self.argument_vocabulary = Vocabulary.load(argument_path)
         self.max_length = max_length
         self.bert_model = bert_model
         self.tokenizer = BertTokenizer.from_pretrained(self.bert_model)
Exemplo n.º 3
0
    def __init__(self,
                 node_types_label_list=None,
                 node_attrs_label_list=None,
                 p2p_edges_label_list=None,
                 p2r_edges_label_list=None,
                 path=None,bert_model='bert-base-cased',max_span_width = 15, max_length=128):
        self.path = path
        self.bert_model = bert_model
        self.max_length = max_length
        self.tokenizer = BertTokenizer.from_pretrained(bert_model)
        self.max_span_width = max_span_width
        self._ontology = FrameOntology(self.path)


        if node_types_label_list:
            self.node_types_vocabulary = Vocabulary(padding="O", unknown=None)
            self.node_types_vocabulary.add_word_lst(node_types_label_list)
            self.node_types_vocabulary.build_vocab()
            self.node_types_vocabulary.save(os.path.join(path, 'node_types_vocabulary.txt'))
        else:
            self.node_types_vocabulary = Vocabulary.load(os.path.join(path, 'node_types_vocabulary.txt'))

        if node_attrs_label_list:
            self.node_attrs_vocabulary = Vocabulary(padding="O", unknown=None)
            self.node_attrs_vocabulary.add_word_lst(node_attrs_label_list)
            self.node_attrs_vocabulary.build_vocab()
            self.node_attrs_vocabulary.save(os.path.join(path, 'node_attrs_vocabulary.txt'))
        else:
            self.node_attrs_vocabulary = Vocabulary.load(os.path.join(path, 'node_attrs_vocabulary.txt'))

        if p2p_edges_label_list:
            self.p2p_edges_vocabulary = Vocabulary(padding=None, unknown=None)
            self.p2p_edges_vocabulary.add_word_lst(p2p_edges_label_list)
            self.p2p_edges_vocabulary.build_vocab()
            self.p2p_edges_vocabulary.save(os.path.join(path, 'p2p_edges_vocabulary.txt'))
        else:
            self.p2p_edges_vocabulary = Vocabulary.load(os.path.join(path, 'p2p_edges_vocabulary.txt'))

        if p2r_edges_label_list:
            self.p2r_edges_vocabulary = Vocabulary(padding=None, unknown=None)
            self.p2r_edges_vocabulary.add_word_lst(p2r_edges_label_list)
            self.p2r_edges_vocabulary.build_vocab()
            self.p2r_edges_vocabulary.save(os.path.join(path, 'p2r_edges_vocabulary.txt'))
        else:
            self.p2r_edges_vocabulary = Vocabulary.load(os.path.join(path, 'p2r_edges_vocabulary.txt'))
Exemplo n.º 4
0
    def __init__(self,
                 bert_model=None,
                 model_path=None,
                 vocabulary_path=None,
                 device=None,
                 device_ids=None,
                 max_seq_length=256):
        super().__init__()
        self.bert_model = bert_model
        self.model_path = model_path
        self.vocabulary_path = vocabulary_path
        self.device = device
        self.device_ids = device_ids
        self.max_seq_length = max_seq_length

        if self.bert_model:
            self.tokenizer = BertTokenizer.from_pretrained(self.bert_model)
        if self.vocabulary_path:
            self.vocabulary = Vocabulary.load(self.vocabulary_path)
Exemplo n.º 5
0
    def __init__(self,
                 schema_path=None,
                 trigger_path=None,
                 argument_path=None,
                 bert_model='bert-base-cased',
                 max_length=128):
        self.schema_path = schema_path
        self.trigger_path = trigger_path
        self.argument_path = argument_path
        self.bert_model = bert_model
        self.max_length = max_length

        self.tokenizer = BertTokenizer.from_pretrained(self.bert_model)
        with open(self.schema_path, 'r', encoding='utf-8') as f:
            self.schema_str = json.load(f)

        self.trigger_type_list = list()
        self.argument_type_list = list()
        trigger_type_set = set()
        argument_type_set = set()
        for trigger_type, argument_type_list in self.schema_str.items():
            trigger_type_set.add(trigger_type)
            for argument_type in argument_type_list:
                argument_type_set.add(argument_type)
        self.trigger_type_list = list(trigger_type_set)
        self.argument_type_list = list(argument_type_set)

        self.args_s_id = {}
        self.args_e_id = {}
        for i in range(len(self.argument_type_list)):
            s = self.argument_type_list[i] + '_s'
            self.args_s_id[s] = i
            e = self.argument_type_list[i] + '_e'
            self.args_e_id[e] = i

        if os.path.exists(self.trigger_path):
            self.trigger_vocabulary = Vocabulary.load(self.trigger_path)
        else:
            self.trigger_vocabulary = Vocabulary(padding=None, unknown=None)
            self.trigger_vocabulary.add_word_lst(self.trigger_type_list)
            self.trigger_vocabulary.build_vocab()
            self.trigger_vocabulary.save(self.trigger_path)
        if os.path.exists(self.argument_path):
            self.argument_vocabulary = Vocabulary.load(self.argument_path)
        else:
            self.argument_vocabulary = Vocabulary(padding=None, unknown=None)
            self.argument_vocabulary.add_word_lst(self.argument_type_list)
            self.argument_vocabulary.build_vocab()
            self.argument_vocabulary.save(self.argument_path)

        self.schema_id = {}
        for trigger_type, argument_type_list in self.schema_str.items():
            self.schema_id[self.trigger_vocabulary.word2idx[trigger_type]] = [
                self.argument_vocabulary.word2idx[a]
                for a in argument_type_list
            ]
        self.trigger_type_num = len(self.trigger_vocabulary)
        self.argument_type_num = len(self.argument_vocabulary)
        self.trigger_max_span_len = {}
        self.argument_max_span_len = {}
        for name in self.trigger_vocabulary.word2idx:
            self.trigger_max_span_len[name] = 1
        for name in self.argument_vocabulary.word2idx:
            self.argument_max_span_len[name] = 1
Exemplo n.º 6
0
 def load_vocabulary(self, path):
     self.vocabulary = Vocabulary.load(os.path.join(path, 'vocabulary.txt'))