Пример #1
0
    def to_example(self, row):
        assert len(row) == len(self.headers), "could not convert row to example %s\n%s" % (row, self.headers)
        d = dict(zip(self.headers, row))
        ex = Example(**d)
        ex.relation = ex.annotated_relations.strip()
        for k in ['pos']:
            ex[k] = ex[k].replace('`', "'")

        if ex.relation in self.relation_map:
            ex.relation = self.relation_map[ex.relation]
        for k in ['dependency', 'dep_extra', 'dep_malt']:
            ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t")
        return self.convert_types(ex)
Пример #2
0
    def to_example(self, row):
        assert len(row) == len(
            self.headers), "could not convert row to example %s\n%s" % (
                row, self.headers)
        d = dict(zip(self.headers, row))
        ex = Example(**d)
        ex.relation = ex.annotated_relations.strip()
        for k in ['pos']:
            ex[k] = ex[k].replace('`', "'")

        if ex.relation in self.relation_map:
            ex.relation = self.relation_map[ex.relation]
        for k in ['dependency', 'dep_extra', 'dep_malt']:
            ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t")
        return self.convert_types(ex)
Пример #3
0
 def to_example(self, row):
     assert len(row) == len(
         self.headers), "could not convert row to example %s\n%s" % (
             row, self.headers)
     d = dict(zip(self.headers, row))
     ex = Example(**d)
     return self.convert_types(ex)
Пример #4
0
    def convert_types(self, ex):
        for e in ['lemmas', 'words']:
            ex[e] = self.parse_array(ex[e], True)
            ex[e] = [w.lower() for w in ex[e]]
        for e in ['pos', 'ner']:
            ex[e] = self.parse_array(ex[e])
        for e in [
                'subject_begin', 'subject_end', 'object_begin', 'object_end'
        ]:
            ex[e] = int(ex[e])

        ex.subject = ' '.join(ex.words[ex.subject_begin:ex.subject_end])
        ex.object = ' '.join(ex.words[ex.object_begin:ex.object_end])
        ex.dependency = self.parse_dependency(ex.dependency, ex)

        for i in xrange(ex.subject_begin, ex.subject_end):
            ex.ner[i] = ex.subject_ner

        for i in xrange(ex.object_begin, ex.object_end):
            ex.ner[i] = ex.object_ner

        if not hasattr(ex, 'relation'):
            ex.relation = None

        return Example(
            **{k: v
               for k, v in ex.__dict__.items() if k in self.keep})
Пример #5
0
    def featurize(self, ex, add=False):
        isbetween = lambda x, start, end: x >= start and x < end
        if isbetween(ex.subject_begin, ex.object_begin, ex.object_end) or isbetween(ex.object_begin, ex.subject_begin, ex.subject_end):
            raise NoPathException(str(ex))

        first = 'subject' if ex.subject_begin < ex.object_begin else 'object'
        second = 'object' if ex.subject_begin < ex.object_begin else 'subject'
        chunk0 = ex.words[:ex[first + '_begin']] 
        chunk1 = chunk0 + [ex[first + '_ner']] 
        chunk2 = chunk1 + ex.words[ex[first + '_end']:ex[second + '_begin']]
        sequence = chunk2 + [ex[second + '_ner']] + ex.words[ex[second + '_end']:]
        first_pos = len(chunk0)
        second_pos = len(chunk2)

        if self.scope > 0:
            start = max(0, first_pos - self.scope)
            end = min(len(sequence), second_pos + self.scope + 1)
            sequence = sequence[start:end]

        feat = Example(**{
            'relation': self.vocab['rel'].get(ex.relation, add=add) if ex.relation else None,
            'subject_ner': self.vocab['ner'].get(ex.subject_ner, add=add),
            'object_ner': self.vocab['ner'].get(ex.object_ner, add=add),
            'orig': ex,
            'sequence': [self.vocab['word'].get(w, add=add) for w in sequence],
            'subject_pos': first_pos if first == 'subject' else second_pos,
            'object_pos': first_pos if first == 'object' else second_pos,
        })
        ex.length = feat.length = len(feat.sequence)

        return feat
Пример #6
0
 def to_example(self, row):
     assert len(row) == len(
         self.headers), "could not convert row to example %s\n%s" % (
             row, self.headers)
     d = dict(zip(self.headers, row))
     ex = Example(**d)
     for k in ['dependency', 'dep_extra', 'dep_malt']:
         ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t")
     return self.convert_types(ex)
Пример #7
0
    def simulate(self, max_turns=None, verbose=False):
        '''
        Simulate a dialogue.
        '''
        self.events = []
        self.max_turns = max_turns
        time = 0
        num_turns = 0
        game_over = False
        self.describe_scenario()
        if random.random() < 0.5:
            first_speaker = 0
        else:
            first_speaker = 1
        while not game_over:
            for agent, session in enumerate(self.sessions):
                if num_turns == 0 and agent != first_speaker:
                    continue
                event = session.send()
                time += 1
                if not event:
                    continue

                event.time = time
                self.event_callback(event)
                self.events.append(event)

                if verbose:
                    print('agent=%s: session=%s, event=%s' %
                          (agent, type(session).__name__, event.to_dict()))
                else:
                    action = event.action
                    data = event.data
                    event_output = data if action == 'message' else "Action: {0}, Data: {1}".format(
                        action, data)
                    print('agent=%s, event=%s' % (agent, event_output))
                num_turns += 1
                if self.game_over() or (max_turns and num_turns >= max_turns):
                    game_over = True
                    break

                for partner, other_session in enumerate(self.sessions):
                    if agent != partner:
                        other_session.receive(event)

        uuid = generate_uuid('E')
        outcome = self.get_outcome()
        if verbose:
            print('outcome: %s' % outcome)
            print('----------------')
        # TODO: add configurable names to systems and sessions
        agent_names = {'0': self.session_names[0], '1': self.session_names[1]}
        return Example(self.scenario, uuid, self.events, outcome, uuid,
                       agent_names)
Пример #8
0
    def featurize(self, ex, add=False):
        if not ex.dependency: # no dependency parse
            raise NoPathException(str(ex))

        feat = Example(**{
            'relation': self.vocab['rel'].get(ex.relation, add=add) if ex.relation else None,
            'subject_ner': self.vocab['ner'].get(ex.subject_ner, add=add),
            'object_ner': self.vocab['ner'].get(ex.object_ner, add=add),
            'dependency': DependencyParse(ex.dependency, enhanced=True).get_path_from_parse(
                ex.subject_begin, ex.subject_end, ex.object_begin, ex.object_end),
            'orig': ex,
        })

        if not feat.dependency: # no shortest path between entities
            raise NoPathException(str(ex))
        return feat
Пример #9
0
 def corrupt(self, feat, add=False):
     corrupted = Example(**deepcopy(feat.__dict__))
     corrupted.corrupt = True
     # randomly drop a node
     drop = np.random.randint(0, len(corrupted.sequence))
     sequence = corrupted.sequence[:drop]
     if drop < len(corrupted.sequence) - 1:
         sequence += corrupted.sequence[drop+1:]
     corrupted.sequence = sequence
     corrupted.relation = self.vocab['rel'].add('no_relation')
     corrupted.length = len(corrupted.sequence)
     return corrupted if corrupted.length else None
Пример #10
0
    def simulate(self, max_turns=100):
        '''Simulate the dialogue.'''
        self.events = []
        time = 0
        self.selections = [None, None]
        self.reward = 0
        num_turns = 0
        timeup = False
        while True:
            for agent, session in enumerate(self.sessions):
                event = session.send()
                time += 1
                if not event:
                    continue
                event.time = time
                self.events.append(event)

                if event.action == 'select':
                    self.selections[agent] = event.data

                print 'agent=%s: session=%s, event=%s' % (
                    agent, type(session).__name__, event.to_dict())
                num_turns += 1
                if num_turns >= max_turns:
                    timeup = True
                for partner, other_session in enumerate(self.sessions):
                    if agent != partner:
                        other_session.receive(event)

                # Game is over when the two selections are the same
                if self.game_over():
                    self.reward = 1
                    break
            if self.game_over() or timeup:
                break

        uuid = generate_uuid('E')
        outcome = {'reward': self.reward}
        print 'outcome: %s' % outcome
        return Example(self.scenario, uuid, self.events, outcome, uuid, None)
Пример #11
0
 def get_decode_vocab(self, example: Example) -> BaseVocabEntry:
     return example.get_both_ext_vocab()