示例#1
0
 def tes_non_word(self):
     w = semfi.get_word("kisseli", "N", "kpv")
     self.assertIsNone(w)
示例#2
0
 def test_get_word(self):
     w = semfi.get_word("кань", "N", "kpv")
     self.assertIsNotNone(w)
示例#3
0
#encoding: utf-8
from uralicNLP import semfi


def print_for(word, pos, rel, target_pos):
    wo = semfi.get_word(word, pos, "fin")
    ws = semfi.get_by_relation(wo, rel, "fin", sort=True)
    for w in ws:
        if w["word2"] is not None and w["word2"]["pos"] == target_pos:
            print w["word2"]["word"]


#print_for("punainen","A", "amod", "N")
wo = semfi.get_word("kettu", "N", "fin")
print semfi.get_all_relations(wo, "fin")
#print semfi.get_by_word_and_relation(semfi.get_word("karhu", "N", "fin"), semfi.get_word("olla", "V", "fin"), "nsubj", "fin")
示例#4
0
def print_for(word, pos, rel, target_pos):
    wo = semfi.get_word(word, pos, "fin")
    ws = semfi.get_by_relation(wo, rel, "fin", sort=True)
    for w in ws:
        if w["word2"] is not None and w["word2"]["pos"] == target_pos:
            print w["word2"]["word"]
示例#5
0
def create_verb_probabilities(usr_input):
    ''' Uses the first input noun to find a verbs that are semantically similar. Outputs verb candidates and their probability distribution. '''

    lemmas = tokenize_and_lemmatize(usr_input)
    input_posses = get_pos_template(lemmas)
    # print("Input POSes: " + input_posses + "\n")

    # If both input words are noun. Other alternatives are not implemented.
    if input_posses == 'NN':
        lemma_dict = {'subject': lemmas[0], 'object': lemmas[1]}
        verse = []

        # Loop through both lemmas and inflect them depending on their syntactic role
        for lemma in lemmas:
            # print_some_input_info(lemma) # FOR DEBUGGING

            for analysis in uralicApi.analyze(lemma, "fin"):
                ms_desc = analysis[0].lstrip(analysis[0].split('+')[0])
                # print("Analysis of the lemma: " + lemma + ms_desc + "\n") # FOR DEBUGGING
                if ms_desc[1] == 'N':
                    if lemma == lemma_dict['subject']:
                        generated = uralicApi.generate(lemma + "+N+Sg+Nom",
                                                       "fin")
                    if lemma == lemma_dict['object']:
                        generated = uralicApi.generate(lemma + "+N+Sg+Gen",
                                                       "fin")

            if len(generated) > 0:
                verse.append(generated[0][0])
            else:
                print("Try with other words.")

            # If the lemma is subject, choose a verb using its word relations. There's probably a better alternative for this.
            if lemma == lemma_dict['subject']:
                word = semfi.get_word(lemma, "N", "fin")
                while True:
                    try:
                        relations = semfi.get_by_relation(word,
                                                          "dobj",
                                                          "fin",
                                                          sort=True)
                        break
                    except Exception as e:
                        print(
                            "At least one of the input words was not recognized, try with other words.\n\n"
                            + e)
                        exit()

                verbs_and_probs = []
                for relation in relations:
                    try:
                        if relation['word2']['pos'] == 'V':
                            inflected_form = uralicApi.generate(
                                relation['word2']['word'] +
                                "+V+Act+Ind+Prs+Sg3", "fin")[0][0]
                            first_syllable = finmeter.hyphenate(
                                inflected_form).split("-")[0]
                            if count_syllables(
                                    inflected_form
                            ) == 2 and not finmeter.is_short_syllable(
                                    first_syllable):
                                verbs_and_probs.append(
                                    (relation['word2']['word'],
                                     relation['word2']['frequency']))
                    except:
                        pass

                # Sort the verb by frequency (descending order) and get rid of the top 5% frequent and the half that is least frequent
                verbs_and_probs = sorted(
                    verbs_and_probs, key=lambda x: x[-1], reverse=True)[round((
                        (len(verbs_and_probs) / 100) *
                        5)):round(((len(verbs_and_probs) / 100) * 50))]
                if len(verbs_and_probs) == 0:
                    print("Try with other words.")
                    exit()

                else:
                    # Normalize the probabilities and choose the verb randomly
                    verb_candidates, probability_distribution = map(
                        list, zip(*verbs_and_probs))
                    probability_distribution = np.array(
                        np.array(probability_distribution) /
                        sum(probability_distribution))

        return verb_candidates, probability_distribution, lemmas, lemma_dict, verse