Exemplo n.º 1
0
 def get_event_vector(self, event_input, include_all_pobj=True):
     if include_all_pobj:
         assert isinstance(event_input, IndexedEventMultiPobj), \
             'event_input must be a {} instance ' \
             'when include_all_pobj=True'.format(
                 get_class_name(IndexedEventMultiPobj))
     else:
         assert isinstance(event_input, IndexedEvent), \
             'event_input must be a {} instance ' \
             'when include_all_pobj=False'.format(
                 get_class_name(IndexedEvent))
     # initialize event vector to be all zero
     vector = np.zeros(self.embedding_model.vector_size)
     # add vector for predicate
     pred_vector = self.embedding_model.get_index_vec(
         event_input.get_predicate())
     if pred_vector is not None:
         vector += pred_vector
     else:
         return None
     # add vectors for all arguments
     for arg_input in event_input.get_all_argument():
         arg_vector = self.embedding_model.get_index_vec(arg_input)
         if arg_vector is not None:
             vector += arg_vector
     return vector
Exemplo n.º 2
0
 def __init__(self, left_event, pos_event, neg_event, pos_arg_idx,
              neg_arg_idx, pos_salience, neg_salience):
     assert isinstance(left_event, IndexedEvent), \
         'left_event must be a {} instance'.format(
             get_class_name(IndexedEvent))
     self.left_event = deepcopy(left_event)
     assert isinstance(pos_event, IndexedEvent), \
         'pos_event must be a {} instance'.format(
             get_class_name(IndexedEvent))
     self.pos_event = deepcopy(pos_event)
     assert isinstance(neg_event, IndexedEvent), \
         'neg_event must be a {} instance'.format(
             get_class_name(IndexedEvent))
     self.neg_event = deepcopy(neg_event)
     assert pos_arg_idx in [1, 2, 3], \
         'pos_arg_type must be 1 (for subj), 2 (for obj), or 3 (for pobj)'
     self.pos_arg_idx = pos_arg_idx
     assert neg_arg_idx in [1, 2, 3], \
         'neg_arg_type must be 1 (for subj), 2 (for obj), or 3 (for pobj)'
     self.neg_arg_idx = neg_arg_idx
     # extra features for entity salience
     assert isinstance(pos_salience, EntitySalience), \
         'pos_salience must be a {} instance'.format(
             get_class_name(EntitySalience))
     self.pos_salience = pos_salience
     assert isinstance(neg_salience, EntitySalience), \
         'neg_salience must be a {} instance'.format(
             get_class_name(EntitySalience))
     self.neg_salience = neg_salience
Exemplo n.º 3
0
    def parse_args(self, treebank_reader, corenlp_reader):
        assert isinstance(treebank_reader, TreebankReader), \
            'treebank_reader must be a {} instance'.format(
                get_class_name(TreebankReader))

        assert isinstance(corenlp_reader, CoreNLPReader), \
            'corenlp_reader must be a {} instance'.format(
                get_class_name(CoreNLPReader))

        for label in self.imp_args:
            for arg in self.imp_args[label]:
                arg.get_treebank(treebank_reader)
                arg.parse_treebank()
                arg.parse_corenlp(corenlp_reader)

        for label, fillers in self.exp_args.items():
            for arg in fillers:
                arg.get_treebank(treebank_reader)
                arg.parse_treebank()
                arg.parse_corenlp(corenlp_reader)
            if label in core_arg_list and len(fillers) > 1:
                assert len(fillers) == 2
                new_fillers = []
                for arg in fillers:
                    # remove pointer pointing to WH-determiner
                    if arg.tree.pos()[arg.tree_pointer.wordnum][1] != 'WDT':
                        new_fillers.append(arg)
                # should only exists one non-WH-determiner pointer
                assert len(new_fillers) == 1
                self.exp_args[label] = new_fillers
Exemplo n.º 4
0
 def __init__(self, core, salience):
     assert isinstance(core, CoreArgument), \
         'RichEntity must be initialized with a {} instance'.format(
             get_class_name(CoreArgument))
     self.core = core
     assert isinstance(salience, EntitySalience), \
         'RichEntity must be initialized with a {} instance'.format(
             get_class_name(EntitySalience))
     self.salience = salience
Exemplo n.º 5
0
 def __init__(self, doc_name, entities, events):
     self.doc_name = doc_name
     if not all(isinstance(entity, Entity) for entity in entities):
         raise ParseScriptError('every entity must be a {} instance'.format(
             get_class_name(Entity)))
     self.entities = entities
     if not all(isinstance(event, Event) for event in events):
         raise ParseScriptError('every event must be a {} instance'.format(
             get_class_name(Event)))
     self.events = events
Exemplo n.º 6
0
 def get_index(self,
               model,
               include_type=True,
               use_unk=True,
               pred_count_dict=None):
     assert isinstance(model, Word2VecModel), \
         'model must be a {} instance'.format(get_class_name(Word2VecModel))
     self.rich_pred.get_index(model,
                              include_type=include_type,
                              use_unk=use_unk,
                              pred_count_dict=pred_count_dict)
     if self.rich_subj is not None:
         self.rich_subj.get_index(model,
                                  include_type=include_type,
                                  use_unk=use_unk)
     if self.rich_obj is not None:
         self.rich_obj.get_index(model,
                                 include_type=include_type,
                                 use_unk=use_unk)
     for rich_pobj in self.rich_pobj_list:
         rich_pobj.get_index(model,
                             include_type=include_type,
                             use_unk=use_unk)
     # select the first argument with indexed positive candidate and at least
     # one indexed negative candidate from rich_pobj_list as the rich_pobj
     for rich_pobj in self.rich_pobj_list:
         if rich_pobj.has_neg():
             self.rich_pobj = rich_pobj
             break
Exemplo n.º 7
0
 def build(cls, event, rich_entity_list, prep_vocab_list, use_lemma=True):
     assert isinstance(event, Event), 'event must be a {} instance'.format(
         get_class_name(Event))
     rich_pred = RichPredicate.build(event.pred, use_lemma=use_lemma)
     rich_subj = None
     if event.subj is not None:
         rich_subj = BaseRichArgument.build('SUBJ',
                                            event.subj,
                                            rich_entity_list,
                                            use_lemma=use_lemma)
     rich_obj = None
     if event.obj is not None:
         rich_obj = BaseRichArgument.build('OBJ',
                                           event.obj,
                                           rich_entity_list,
                                           use_lemma=use_lemma)
     rich_pobj_list = []
     for prep, pobj in event.pobj_list:
         arg_type = 'PREP_' + prep if prep in prep_vocab_list else 'PREP'
         rich_pobj = BaseRichArgument.build(arg_type,
                                            pobj,
                                            rich_entity_list,
                                            use_lemma=use_lemma)
         rich_pobj_list.append(rich_pobj)
     return cls(rich_pred, rich_subj, rich_obj, rich_pobj_list)
Exemplo n.º 8
0
 def add_sent(self, sent):
     assert isinstance(sent, Sentence), \
         'add_sent must be called with a {} instance'.format(
             get_class_name(Sentence))
     sent.build_dep_graph()
     sent.process_verb_prt()
     self.sents.append(sent)
Exemplo n.º 9
0
 def __init__(self,
              word2vec,
              event_vector_layer_sizes=None,
              pair_composition_layer_sizes=None,
              use_salience=True,
              salience_features=None):
     assert isinstance(word2vec, Word2VecModel), \
         'word2vec must be a {} instance'.format(
             get_class_name(Word2VecModel))
     self.word2vec = word2vec
     if event_vector_layer_sizes:
         self.event_vector_network = EventVectorNetwork(
             word_vectors=self.word2vec.get_vector_matrix(),
             vector_size=self.word2vec.vector_size,
             layer_sizes=event_vector_layer_sizes)
     else:
         self.event_vector_network = None
     if pair_composition_layer_sizes:
         self.pair_composition_network = PairCompositionNetwork(
             event_vector_network=self.event_vector_network,
             layer_sizes=pair_composition_layer_sizes,
             use_salience=use_salience,
             salience_features=salience_features)
     else:
         self.pair_composition_network = None
Exemplo n.º 10
0
 def set_embedding_model(self, embedding_model):
     assert isinstance(embedding_model, Word2VecModel), \
         'model must be a {} instance'.format(get_class_name(Word2VecModel))
     self.logger.info('set embedding model: {}'.format(
         embedding_model.name))
     self.embedding_model = embedding_model
     self.embedding_model_name = embedding_model.name
Exemplo n.º 11
0
 def build(cls,
           script,
           prep_vocab_list,
           use_lemma=True,
           filter_stop_events=False):
     assert isinstance(script, Script), \
         'script must be a {} instance'.format(get_class_name(Script))
     # FIXME: should use the token count of original document
     token_count_dict = script.get_token_count(use_lemma=use_lemma)
     rich_entity_list = []
     for entity in script.entities:
         rich_entity = RichEntity.build(entity,
                                        token_count_dict,
                                        use_lemma=use_lemma)
         rich_entity_list.append(rich_entity)
     rich_events = []
     for event in script.events:
         rich_event = RichEvent.build(event,
                                      rich_entity_list=rich_entity_list,
                                      prep_vocab_list=prep_vocab_list,
                                      use_lemma=use_lemma)
         if (not filter_stop_events) or \
                 (rich_event.rich_pred.get_text(include_type=False)
                  not in consts.STOP_PREDS):
             rich_events.append(rich_event)
     return cls(script.doc_name, rich_events, rich_entity_list)
Exemplo n.º 12
0
 def __init__(self, mentions):
     if not mentions:
         raise ParseEntityError('must provide at least one mention')
     if not all(isinstance(mention, Mention) for mention in mentions):
         raise ParseEntityError(
             'every mention must be a {} instance'.format(
                 get_class_name(Mention)))
     self.mentions = mentions
     self._rep_mention = None
     for mention in self.mentions:
         if mention.rep:
             if self._rep_mention is None:
                 self._rep_mention = mention
             else:
                 raise ParseEntityError(
                     'cannot have more than one representative mentions')
     if self._rep_mention is None:
         raise ParseEntityError('no representative mention provided')
     # NOBUG: set self.ner to be the most frequent ner of all mentions
     # might be different than the ner of rep_mention
     ner_counter = Counter()
     for mention in self.mentions:
         if mention.ner != '':
             ner_counter[mention.ner] += 1
     if len(ner_counter):
         self.ner = ner_counter.most_common(1)[0][0]
     else:
         self.ner = ''
Exemplo n.º 13
0
 def from_coref(cls, coref):
     if not isinstance(coref, document.Coreference):
         raise ParseEntityError(
             'from_coref must be called with a {} instance'.format(
                 get_class_name(document.Coreference)))
     return cls(
         [Mention.from_mention(mention) for mention in coref.mentions])
Exemplo n.º 14
0
    def __init__(self, node_list):
        if len(node_list) <= 1:
            raise ParseNodeError('only 1 node provided in {}'.format(
                map(str, node_list)))
        if not all(isinstance(n, Node) for n in node_list):
            raise ParseNodeError('every node must be a {} instance'.format(
                get_class_name(Node)))
        if not all(node_list[0].file_id == n.file_id for n in node_list[1:]):
            raise ParseNodeError('inconsistency in file_id in {}'.format(
                map(str, node_list)))
        if not all(node_list[0].sent_id == n.sent_id for n in node_list[1:]):
            raise ParseNodeError('inconsistency in sent_id in {}'.format(
                map(str, node_list)))
        self.node_list = sorted(node_list, key=lambda n: n.token_id)
        self.file_id = node_list[0].file_id
        self.sent_id = node_list[0].sent_id

        # Penn TreeBank related info
        self.ptb_idx_list = []
        self.ptb_surface = ''

        # Stanford CoreNLP related info
        self.corenlp_idx_list = []
        self.corenlp_word_surface = ''
        self.corenlp_lemma_surface = ''

        self.head_idx = -1
        self.head_word = ''

        self.head_node = None
Exemplo n.º 15
0
    def get_index(self,
                  model,
                  include_type=True,
                  use_unk=True,
                  pred_count_dict=None):
        # TODO: add logic to process stop predicates
        assert isinstance(model, Word2VecModel), \
            'model must be a {} instance'.format(get_class_name(Word2VecModel))
        candidates = self.get_candidates()
        # add UNK to the candidates if use_unk is set to True
        if use_unk:
            candidates.append('UNK')

        # drop the predicate (return index -1) if its frequency is too high
        # use the threshold of count as consts.PRED_COUNT_THRES (100,000)
        if candidates and pred_count_dict:
            pred_count = pred_count_dict.get(candidates[0], 0)
            if pred_count > consts.PRED_COUNT_THRES:
                if random.random() < 1.0 - math.sqrt(
                        float(consts.PRED_COUNT_THRES) / pred_count):
                    self.wv = -1
                    return

        if include_type:
            candidates = [candidate + '-PRED' for candidate in candidates]
        index = -1
        for text in candidates:
            index = model.get_word_index(text)
            if index != -1:
                break
        self.wv = index
Exemplo n.º 16
0
 def __repr__(self):
   return '%s\n%s'%(
       get_class_name(self),
       '\n'.join(
         ' '.join(mapl(str,locs))
         for locs in self._loc_matrix)
       )
Exemplo n.º 17
0
    def _create_service(self, service_package, base_package, db_info):
        if not os.path.exists(service_package):
            os.makedirs(service_package)
        for table_info in db_info.items():
            # 为每一张表创建一个对应的Service
            table_name = table_info[0]
            columns = table_info[1]
            # 找到主键
            primary_key, primary_key_type, primary_key_type_short, primary_key_jdbc_type = \
                self._get_primary_key(columns)
            # 内容
            content = ''
            class_name = util.get_class_name(table_name)
            file_name = os.path.join(service_package, class_name + 'Service.java')
            package_str = 'package ' + base_package + '.service;'
            import_str = 'import ' + base_package + '.pojo.' + class_name + ';'
            class_str = 'public interface ' + class_name + 'Service {'
            class_str += '\n'
            class_str += '\tint deleteByPrimaryKey(' + primary_key_type_short + ' ' + primary_key + ');\n\n'
            class_str += '\tint insert(' + class_name + ' record);\n\n'
            class_str += '\tint insertSelective(' + class_name + ' record);\n\n'
            class_str += '\t' + class_name + ' selectByPrimaryKey(' + primary_key_type_short + \
                         ' ' + primary_key + ');\n\n'
            class_str += '\tint updateByPrimaryKeySelective(' + class_name + ' record);\n\n'
            class_str += '\tint updateByPrimaryKey(' + class_name + ' record);\n'
            class_str += '}'

            content += package_str
            content += '\n\n'
            content += import_str
            content += '\n\n'
            content += class_str
            self._write_file(content, file_name)
Exemplo n.º 18
0
 def get_sent(self, idx):
     assert 0 <= idx < len(self.sents), \
         '{} out of sentence index'.format(idx)
     result = self.sents[idx]
     assert isinstance(result, Sentence), \
         'return value of get_sent must be a {} instance'.format(
             get_class_name(Sentence))
     return result
Exemplo n.º 19
0
 def get_coref(self, idx):
     assert 0 <= idx < len(self.corefs), \
         '{} out of coreference index'.format(idx)
     result = self.corefs[idx]
     assert isinstance(result, Coreference), \
         'return value of get_coref must be a {} instance'.format(
             get_class_name(Coreference))
     return result
Exemplo n.º 20
0
    def __init__(self, arg_pointer, dice_score, core, entity_salience):
        assert isinstance(arg_pointer, RichTreePointer), \
            'arg_pointer must be a {} instance'.format(
                get_class_name(RichTreePointer))
        self.arg_pointer = arg_pointer

        self.dice_score = dice_score

        assert isinstance(core, CoreArgument), \
            'core must be a {} instance'.format(get_class_name(CoreArgument))
        self.core = deepcopy(core)

        assert isinstance(entity_salience, EntitySalience) or \
            entity_salience is None, \
            'entity_salience must be a {} instance or None'.format(
                get_class_name(EntitySalience))
        self.entity_salience = entity_salience
Exemplo n.º 21
0
 def get_mention(self, idx):
     assert 0 <= idx < len(self.mentions), \
         '{} out of mention index'.format(idx)
     result = self.mentions[idx]
     assert isinstance(result, Mention), \
         'return value of get_mention must be a {} instance'.format(
             get_class_name(Mention))
     return result
Exemplo n.º 22
0
 def add_token(self, token):
     assert isinstance(token, Token), \
         'add_token must be called with a {} instance'.format(
             get_class_name(Token))
     # set the sent_idx attrib of the token
     token.set_attrib('sent_idx', self.idx)
     # set the token_idx attrib of the token
     token.set_attrib('token_idx', len(self.tokens))
     self.tokens.append(token)
Exemplo n.º 23
0
 def __init__(self, pred, neg=False):
     assert pred is not None and isinstance(pred, document.Token), \
         'Predicate must be a {} instance'.format(
             get_class_name(document.Token))
     self.pred = pred
     self.neg = neg
     self.subj = None
     self.obj = None
     self.pobj_list = []
Exemplo n.º 24
0
 def from_token(cls, token):
     if not isinstance(token, document.Token):
         raise ParseTokenError(
             'from_token must be called with a {} instance'.format(
                 get_class_name(document.Token)))
     word = token.word
     lemma = token.lemma
     pos = token.pos
     return cls(word, lemma, pos)
Exemplo n.º 25
0
 def __init__(self, pred, subj, obj, pobj_list):
     if not isinstance(pred, Predicate):
         raise ParseEventError('pred must be a {} instance'.format(
             get_class_name(Predicate)))
     self.pred = pred
     if not (subj is None or isinstance(subj, Argument)):
         raise ParseEventError('subj must be None or a {} instance'.format(
             get_class_name(Argument)))
     self.subj = subj
     if not (obj is None or isinstance(obj, Argument)):
         raise ParseEventError('obj must be None or a {} instance'.format(
             get_class_name(Argument)))
     self.obj = obj
     if not all(prep != '' for prep, _ in pobj_list):
         warn('some of prep(s) in pobj_list are empty')
     if not all(isinstance(pobj, Argument) for _, pobj in pobj_list):
         raise ParseEventError('every pobj must be a {} instance'.format(
             get_class_name(Argument)))
     self.pobj_list = pobj_list
Exemplo n.º 26
0
 def _generate_getters(columns):
     getters_str = ''
     for column in columns:
         name = column['name']
         db_type = column['type']
         method_name = util.get_class_name(name)
         field_name = util.get_field_name(name)
         java_type = util.get_java_type(db_type)
         getters_str += '\tpublic ' + java_type + ' get' + method_name + '() { return ' + field_name + '; }\n'
     return getters_str
Exemplo n.º 27
0
 def construct(cls, doc):
     assert isinstance(doc, document.Document), \
         'read_from_document must be called with a {} instance'.format(
             get_class_name(document.Document))
     script = cls(doc.doc_name)
     for sent in doc.sents:
         script.read_from_sentence(sent)
     for coref in doc.corefs:
         script.add_coref(coref)
     script.sort()
     return script
Exemplo n.º 28
0
 def _generate_setters(columns):
     setters_str = ''
     for column in columns:
         name = column['name']
         db_type = column['type']
         method_name = util.get_class_name(name)
         field_name = util.get_field_name(name)
         java_type = util.get_java_type(db_type)
         setters_str += '\tpublic void set' + method_name + '(' + java_type + ' ' + \
                        field_name + ') { this.' + field_name + ' = ' + field_name + '; }\n'
     return setters_str
Exemplo n.º 29
0
def build_pred_wv_mapping(pred_list, model):
    assert isinstance(model, Word2VecModel), \
        'model must be a {} instance'.format(get_class_name(Word2VecModel))

    pred_wv_mapping = {}
    for pred in pred_list:
        index = model.get_word_index(pred + '-PRED')
        assert index != -1
        pred_wv_mapping[pred] = index

    return pred_wv_mapping
Exemplo n.º 30
0
 def add_mention(self, mention):
     assert isinstance(mention, Mention), \
         'add_mention must be called with a {} instance'.format(
             get_class_name(Mention))
     # set the coref_idx attrib of the mention
     mention.set_attrib('coref_idx', self.idx)
     # set the mention_idx attrib of the mention
     mention.set_attrib('mention_idx', len(self.mentions))
     self.mentions.append(mention)
     if mention.rep:
         self.rep_mention = mention