def tes_non_word(self): w = semfi.get_word("kisseli", "N", "kpv") self.assertIsNone(w)
def test_get_word(self): w = semfi.get_word("кань", "N", "kpv") self.assertIsNotNone(w)
#encoding: utf-8 from uralicNLP import semfi def print_for(word, pos, rel, target_pos): wo = semfi.get_word(word, pos, "fin") ws = semfi.get_by_relation(wo, rel, "fin", sort=True) for w in ws: if w["word2"] is not None and w["word2"]["pos"] == target_pos: print w["word2"]["word"] #print_for("punainen","A", "amod", "N") wo = semfi.get_word("kettu", "N", "fin") print semfi.get_all_relations(wo, "fin") #print semfi.get_by_word_and_relation(semfi.get_word("karhu", "N", "fin"), semfi.get_word("olla", "V", "fin"), "nsubj", "fin")
def print_for(word, pos, rel, target_pos): wo = semfi.get_word(word, pos, "fin") ws = semfi.get_by_relation(wo, rel, "fin", sort=True) for w in ws: if w["word2"] is not None and w["word2"]["pos"] == target_pos: print w["word2"]["word"]
def create_verb_probabilities(usr_input): ''' Uses the first input noun to find a verbs that are semantically similar. Outputs verb candidates and their probability distribution. ''' lemmas = tokenize_and_lemmatize(usr_input) input_posses = get_pos_template(lemmas) # print("Input POSes: " + input_posses + "\n") # If both input words are noun. Other alternatives are not implemented. if input_posses == 'NN': lemma_dict = {'subject': lemmas[0], 'object': lemmas[1]} verse = [] # Loop through both lemmas and inflect them depending on their syntactic role for lemma in lemmas: # print_some_input_info(lemma) # FOR DEBUGGING for analysis in uralicApi.analyze(lemma, "fin"): ms_desc = analysis[0].lstrip(analysis[0].split('+')[0]) # print("Analysis of the lemma: " + lemma + ms_desc + "\n") # FOR DEBUGGING if ms_desc[1] == 'N': if lemma == lemma_dict['subject']: generated = uralicApi.generate(lemma + "+N+Sg+Nom", "fin") if lemma == lemma_dict['object']: generated = uralicApi.generate(lemma + "+N+Sg+Gen", "fin") if len(generated) > 0: verse.append(generated[0][0]) else: print("Try with other words.") # If the lemma is subject, choose a verb using its word relations. There's probably a better alternative for this. if lemma == lemma_dict['subject']: word = semfi.get_word(lemma, "N", "fin") while True: try: relations = semfi.get_by_relation(word, "dobj", "fin", sort=True) break except Exception as e: print( "At least one of the input words was not recognized, try with other words.\n\n" + e) exit() verbs_and_probs = [] for relation in relations: try: if relation['word2']['pos'] == 'V': inflected_form = uralicApi.generate( relation['word2']['word'] + "+V+Act+Ind+Prs+Sg3", "fin")[0][0] first_syllable = finmeter.hyphenate( inflected_form).split("-")[0] if count_syllables( inflected_form ) == 2 and not finmeter.is_short_syllable( first_syllable): verbs_and_probs.append( (relation['word2']['word'], relation['word2']['frequency'])) except: pass # Sort the verb by frequency (descending order) and get rid of the top 5% frequent and the half that is least frequent verbs_and_probs = sorted( verbs_and_probs, key=lambda x: x[-1], reverse=True)[round(( (len(verbs_and_probs) / 100) * 5)):round(((len(verbs_and_probs) / 100) * 50))] if len(verbs_and_probs) == 0: print("Try with other words.") exit() else: # Normalize the probabilities and choose the verb randomly verb_candidates, probability_distribution = map( list, zip(*verbs_and_probs)) probability_distribution = np.array( np.array(probability_distribution) / sum(probability_distribution)) return verb_candidates, probability_distribution, lemmas, lemma_dict, verse