def benchmark_polyglot_mdl(): """ Running ployglot requires these packages: # Morfessor==2.0.6 # PyICU==2.4.2 # pycld2==0.41 # polyglot """ start = time.time() tags_pred = [] for tokens in sentences_tokens: word_list = WordList(tokens, language='da') ne_chunker = POSTagger(lang='da') word_ent_tuples = list(ne_chunker.annotate(word_list)) tags_pred.append([entity for word, entity in word_ent_tuples]) print('**Polyglot model**') print("Made predictions on {} sentences and {} tokens in {}s".format( num_sentences, num_tokens, time.time() - start)) assert len(tags_pred) == num_sentences assert sum([len(s) for s in tags_pred]) == num_tokens print(classification_report(tags_true, tags_pred, digits=4))
def benchmark_polyglot_mdl(corrected_output=False): """ Running polyglot requires these packages: # Morfessor==2.0.6 # PyICU==2.4.2 # pycld2==0.41 # polyglot """ def udify_tag(tag, word): if tag == "CONJ": return "CCONJ" if tag == "VERB" and word in auxiliary_verbs: return "AUX" return tag start = time.time() tags_pred = [] for tokens in sentences_tokens: word_list = WordList(tokens, language='da') tagger = POSTagger(lang='da') word_tag_tuples = list(tagger.annotate(word_list)) tags_pred.append([ udify_tag(tag, word) if corrected_output else tag for word, tag in word_tag_tuples ]) print('**Polyglot model' + (' (corrected output) ' if corrected_output else '') + '**') print_speed_performance(start, num_sentences, num_tokens) assert len(tags_pred) == num_sentences assert sum([len(s) for s in tags_pred]) == num_tokens print(accuracy_report(tags_true, tags_pred), end="\n\n")
def tagOnly( text: str, lng: str, ): print("TAG=====(tagOnly)") print("TEXT=" + text) print("LNG=" + lng) tagger = POSTagger(lang=lng) lines = text.split("\n") taggeds = [] for line in lines: words = line.strip().split() tagged = tagger.annotate(words) if len(taggeds) > 0: taggeds.extend([["\n", "NL"]]) taggeds.extend([[w, p] for w, p in tagged]) return taggeds
def pos_tag(args): """Tag words with their part of speech.""" tagger = POSTagger(lang=args.lang) tag(tagger, args)
def pos_tagger(self): return POSTagger(lang=self.language.code)