Exemplo n.º 1
0
    def extract_chain_feature_dicts(chains):
        """
        Chains should be given as a list of (entity, event list) pairs.

        """
        for entity, chain in chains:
            yield [predicate_relation(entity, ev) for ev in chain]
Exemplo n.º 2
0
 def get_event_repr(entity,
                    event,
                    predicative_adjectives=False,
                    transitivity=False):
     if transitivity:
         return predicate_relation_with_transitivity(
             entity, event, handle_predicative=predicative_adjectives)
     else:
         return predicate_relation(
             entity, event, handle_predicative=predicative_adjectives)
Exemplo n.º 3
0
    def extract_chain_feature_lists(chains, only_verb=False, adjectives=False):
        """
        Chains should be given as a list of (entity, event list) pairs.

        """
        for entity, chain in chains:
            if only_verb:
                yield [event.verb_lemma for event in chain]
            else:
                yield [predicate_relation(entity, ev, handle_predicative=adjectives) for ev in chain]
 def _filter_chains(chains):
     filtered_chains = []
     for entity, events in chains:
         filtered_events = [
             event for event in _filter_events(entity, events) if
             predicate_relation(entity, event) in predicates and
             all(word in arguments for word in event.get_np_argument_words())
         ]
         if len(filtered_events):
             filtered_chains.append((entity, filtered_events))
     return filtered_chains
Exemplo n.º 5
0
    def feature_iter(self):
        pbar = None
        if self.progress:
            pbar = get_progress_bar(len(self.corpus), title=self.progress)

        try:
            for doc_num, document in enumerate(self.corpus):
                if pbar:
                    pbar.update(doc_num)

                for entity, events in document.get_chains():
                    yield [predicate_relation(entity, e) for e in events]
        finally:
            if pbar:
                pbar.finish()
Exemplo n.º 6
0
    def do_neighbours(self, line, **kwargs):
        entities, events, line = ModelShell.parse_event_context(line)
        if line.strip():
            print "Ignoring remainder of input: %s" % line

        preds = [predicate_relation(entities[0], event) for event in events]

        # Score all events in the vocabulary
        pmis = list(
            reversed(
                sorted([(vocab_ev,
                         sum(
                             self.model.pmi(vocab_ev, context_ev)
                             for context_ev in preds))
                        for vocab_ev in self.model.event_counts.keys()],
                       key=itemgetter(1))))

        for event, score in pmis[:10]:
            if score == 0.:
                break
            print event, score
                        event.substitute_entity(event_entity, event_entity.get_head_word())
                yield event
    else:
        _filter_events = lambda entity, events: events

    if opts.threshold is not None:
        # Run over the dataset to count up predicates and arg words so we know what to filter out
        predicates = Counter()
        arguments = Counter()
        log.info("Counting event slot words to apply threshold")
        pbar = get_progress_bar(len(corpus), title="Counting")
        for doc in pbar(corpus):
            for entity, events in doc.get_chains():
                events = list(_filter_events(entity, events))
                # Collect the predicate of each event
                predicates.update([predicate_relation(entity, event) for event in events])
                # Collect all np args from the events
                args = sum([event.get_np_argument_words() for event in events], [])
                arguments.update(args)
        pbar.finish()
        # Get just the most common words
        predicates = [p for (p, cnt) in predicates.items() if cnt >= opts.threshold]
        arguments = [a for (a, cnt) in arguments.items() if cnt >= opts.threshold]
        log.info("Predicate set of %d, argument set of %d" % (len(predicates), len(arguments)))

        # Prepare a filter to get rid of any events with rare words
        def _filter_chains(chains):
            filtered_chains = []
            for entity, events in chains:
                filtered_events = [
                    event for event in _filter_events(entity, events) if