Exemplo n.º 1
0
    def makeTokenSets(self, is_locorg_allowed=True):
        """Create a dictionary of typed TokenSet objects corresponding to the mentions
        
        is_locorg_allowed - enable/disable 'LocOrg' tag"""
        
        # determine what tags are allowed
        allowed_tags = set(['org', 'per', 'loc'])
        if is_locorg_allowed:
            allowed_tags.add('locorg')
            
        res = []
        for mention in self.mentions:
            key = mention.tag
            if key == 'locorg' and not is_locorg_allowed:
                key = 'loc'
            if not (key in allowed_tags):
                continue
            ts = TokenSet(
                    [x for span in mention.spans for x in span.tokens],
                    key, self.text)

            for span in mention.spans:
                for token in span.tokens:
                    mark = Tables.getMark(ts.tag, span.tag)
                    ts.setMark(token, mark)
            res.append(ts)

        # find and mark organizations embedded within other organizations
        all_orgs = [x for x in res if x.tag == 'org']
        if is_locorg_allowed:
            all_orgs.extend([x for x in res if x.tag == 'locorg'])
        for org in all_orgs:
            org.findParents(all_orgs)

        return res
Exemplo n.º 2
0
 def makeTokenSets(self, standard, is_locorg_allowed=True):
     """Create a dictionary of typed TokenSet objects corresponding to the entities,
     using the provided standard data to tokenize the intervals"""
     
     res = dict([(x, []) for x in self.allowed_tags])
     for key in self.allowed_tags:
         for interval in self.entities[key]:
             ts = TokenSet([token
                           for token in standard.tokens
                               if token.start >= interval.start
                                   and token.end <= interval.end
                                   and not token.isIgnored()],
                          key)
             if not is_locorg_allowed and key == 'locorg':
                 ts.tag = 'loc'
             res[ts.tag].append(ts)
     
     if not is_locorg_allowed:
         assert(len(res['locorg']) == 0)
         res.pop('locorg')
     return res
Exemplo n.º 3
0
    def makeTokenSets(self, standard, is_locorg_allowed=True):
        """Create a dictionary of typed TokenSet objects corresponding to the mentions,
        using the provided standard data to tokenize the intervals"""

        res = []
        for key in self.allowed_tags:
            for interval in self.mentions[key]:
                ts = TokenSet([
                    token for token in standard.tokens
                    if token.start >= interval.start
                    and token.end <= interval.end and not token.isIgnored()
                ], key, standard.text)

                # save the interval within the token set
                # to display it as-is in future
                ts.interval = interval

                if not is_locorg_allowed and key == 'locorg':
                    ts.tag = 'loc'
                res.append(ts)

        return res
Exemplo n.º 4
0
    def makeTokenSets(self, standard, is_locorg_allowed=True):
        """Create a dictionary of typed TokenSet objects corresponding to the mentions,
        using the provided standard data to tokenize the intervals"""
        
        res = []
        for key in self.allowed_tags:
            for interval in self.mentions[key]:
                ts = TokenSet([token
                              for token in standard.tokens
                                  if token.start >= interval.start
                                      and token.end <= interval.end
                                      and not token.isIgnored()],
                             key, standard.text)

                # save the interval within the token set
                # to display it as-is in future
                ts.interval = interval

                if not is_locorg_allowed and key == 'locorg':
                    ts.tag = 'loc'
                res.append(ts)
        
        return res