def _find_bad_tag_and_raise_error(self, tags): ext_pos = parser.ExtPos() bad_tags = set() for tag in set(tags): good_tag = ext_pos.addTagConstraints(parser.StringVector([tag])) if not good_tag: bad_tags.add(tag) raise ValueError("Invalid POS tags (not present in the parser's " "terms.txt file): %s" % ', '.join(sorted(bad_tags)))
def _possible_tags_to_ext_pos(self, tokens, possible_tags): ext_pos = parser.ExtPos() if not possible_tags: return ext_pos for index in range(len(tokens)): tags = possible_tags.get(index, []) if isinstance(tags, basestring): tags = [tags] tags = map(str, tags) valid_tags = ext_pos.addTagConstraints(parser.StringVector(tags)) if not valid_tags: # at least one of the tags is bad -- find out which ones # and throw a ValueError self._find_bad_tag_and_raise_error(tags) return ext_pos