def get_words_with_tag(language, words, pos_tag, tagger, separator='_'): """ Returns the words that are tagged with the given tag. >>> get_words_with_tag('en', ['cat', 'little'], 'NN', 'pos_tag_stanford') [('cat', 'NN')] >>> get_words_with_tag('fr', [(u'jouer_VB'), (u'aimer_VB'), \\ ... (u'peut-être_RB')], 'VB', 'pos_tag_melt') [(u'jouer', u'VB'), (u'aimer', u'VB')] :language: language id, currently en or fr :param words: list of words, optionally tagged <word>_<tag> :param tag: POS-tag :param tagger: method to use for tagging :param separator: the character that separates the word and the tag in the word list :return: list of words of the given part of speech """ selected = [] if len(words) == 0: return selected else: if words[0].find(separator) == -1: if language == 'en': tagged = tag_en.quick_pos_tag(words, tagger=tagger) if language == 'fr': tagged = tag_fr.quick_pos_tag(words, tagger=tagger) if language == 'fi': tagged = tag_fi.quick_pos_tag(words) else: tagged = helpers.strings_to_tuples(words, separator) tagged = helpers.capitalize_nnp(tagged) for token in tagged: if len(token) > 1: if token[1] == pos_tag: selected.append(token) return selected