Exemplo n.º 1
0
def main(text, model_CWI, treshold, CWI_NER, SSSG_WSD, metode, model_SR):
    definition = []
    #CWI
    hasil_CWI = []
    if CWI_NER:
        hasil_CWI = cwi_main.complexWordIdentificationNER(
            text, model_CWI, treshold)
    else:
        hasil_CWI = cwi_main.complexWordIdentification(text, model_CWI,
                                                       treshold)

    print("CWI ## Hasil ", hasil_CWI)

    ##SSSG
    hasil_SSSG = []
    tenses = []
    if SSSG_WSD:
        for word in hasil_CWI:
            try:
                tenses.append(en.verb_tense(word[0].lower()))
            except:
                tenses.append("")
            tmp = sssg_main.get_synset(metode, word[0], text)
            try:
                print("SSSG ## Definition", word[0], ":",
                      sssg_main.get_word_definition(tmp))
                definition.append(word[0] + " : " +
                                  sssg_main.get_word_definition(tmp))
            except:
                print("SSSG ## Definition", word[0])

            try:
                tmp = sssg_main.get_word_synonym(tmp)
            except:
                tmp = [word[0]]
            hasil_SSSG.append([tmp, word[1]])

    else:
        for word in hasil_CWI:
            hasil_SSSG.append(
                [sssg_main.get_word_synonym_noWSD(word[0]), word[1]])

    print("SSSG ## Hasil", hasil_SSSG)

    ##SR
    hasil_SR = []

    for words in hasil_SSSG:
        tmp = sr_main.wordRanking(model_SR, words[0])
        print(tmp, words)
        try:
            hasil_SR.append([tmp[0][0], words[1]])
        except:
            continue

    print("SR ## Hasil", hasil_SR)

    ##Hasil
    hasil_text = text.split()
    i = 0
    for word in hasil_SR:
        try:
            hasil_text[word[1]] = en.verb_conjugate(word[0], tense=tenses[i])
        except:
            hasil_text[word[1]] = word[0]
        i += 1

    hasil_text = " ".join(hasil_text)

    #Output
    print("")
    print(text)
    print(hasil_text)
    return hasil_text, definition


# main(text, model_CWI, treshold, CWI_NER, SSSG_WSD, metode, model_SR)
Exemplo n.º 2
0
 def conjugate(self, word, tense="infinitive", negate=False):
     return verb_lib.verb_conjugate(word, tense, negate)
    def get_synonyms(self):

        spacy_module = spacy.load('en')

        doc = spacy_module("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in self.token_sent
        ]).strip())

        for token in doc:
            if str(token) == self.word:
                self.lemma = token.lemma_

        import requests

        THEASURUS_KEY = "Get your theasurus key from https://words.bighugelabs.com/site/api"

        r = requests.get(url='http://words.bighugelabs.com/api/2/' +
                         THEASURUS_KEY + '/' + self.lemma + '/json')
        if r.status_code != 404:
            try:
                if 'V' in self.pos:
                    self.synonyms = r.json()["verb"]["syn"]
                    self.tense = verb.verb_tense(self.word)
                elif 'N' in self.pos:
                    try:
                        self.synonyms = r.json()["noun"]["syn"] + r.json(
                        )["noun"]["sim"]
                    except:
                        self.synonyms = r.json()["noun"]["syn"]
                elif 'J' in self.pos:
                    try:
                        self.synonyms = r.json()["adjective"]["syn"] + r.json(
                        )["adjective"]["sim"]
                    except:
                        self.synonyms = r.json()["adjective"]["syn"]
                elif 'RB' in self.pos:
                    try:
                        self.synonyms = r.json()["adverb"]["syn"] + r.json(
                        )["adverb"]["sim"]
                    except:
                        self.synonyms = r.json()["adverb"]["syn"]
            except:
                total_list = []
                for pos in r.json():
                    for type_ in r.json()[pos]:
                        total_list.append(r.json()[pos][type_])
                self.synonyms = [
                    item for sublist in total_list for item in sublist
                ]

        self.synonyms = [x.split(' ') for x in self.synonyms]

        temp_set = []

        for word in self.synonyms:
            temp_set.append(word[0])

        temp_set = set(temp_set)

        temp_set = [[x] for x in temp_set]

        self.synonyms = temp_set

        if self.is_plural == True:
            p = inflect.engine()
            all_synonyms = []
            for synonym in self.synonyms:
                new_synonyms = []
                for word in synonym:
                    if p.singular_noun(word) is False:
                        new_synonyms.append(plural.noun_plural(word))
                    else:
                        new_synonyms.append(word)
                all_synonyms.append(new_synonyms)

            self.synonyms = all_synonyms

        if self.tense != None:
            tense_synonyms = []
            for x in self.synonyms:
                multi_word = []
                for element in x:
                    try:
                        multi_word.append(
                            (verb.verb_conjugate(element,
                                                 tense=self.tense,
                                                 negate=False)))
                    except:
                        multi_word.append(element)
                tense_synonyms.append(multi_word)

            self.synonyms = tense_synonyms
Exemplo n.º 4
0
    def get_synonyms(self):

        spacy_module = spacy.load('en_core_web_sm')

        doc = spacy_module("".join([
            " " +
            i if not i.startswith("'") and i not in string.punctuation else i
            for i in self.token_sent
        ]).strip())

        for token in doc:
            if str(token) == self.word:
                self.lemma = token.lemma_

        if self.lemma == None:
            self.synonyms = []
            return

        import requests

        ##6c6bbfe357c61dcc40b628419778ebd7

        ##ce218b46b8d46a30bebc843f4da120d8

        r = requests.get(
            url=
            'http://words.bighugelabs.com/api/2/6c6bbfe357c61dcc40b628419778ebd7/'
            + self.lemma + '/json')
        if r.status_code != 404:

            try:
                #print("----------")
                #print(r.json())

                if type(r.json()) == list:
                    self.synonyms = r.json()
                elif 'V' in self.pos:
                    self.synonyms = r.json()["verb"]["syn"]
                    self.tense = verb.verb_tense(self.word)
                elif 'N' in self.pos:
                    try:
                        self.synonyms = r.json()["noun"]["syn"] + r.json(
                        )["noun"]["sim"]
                    except:
                        self.synonyms = r.json()["noun"]["syn"]
                elif 'J' in self.pos:
                    try:
                        self.synonyms = r.json()["adjective"]["syn"] + r.json(
                        )["adjective"]["sim"]
                    except:
                        self.synonyms = r.json()["adjective"]["syn"]
                elif 'RB' in self.pos:
                    try:
                        self.synonyms = r.json()["adverb"]["syn"] + r.json(
                        )["adverb"]["sim"]
                    except:
                        self.synonyms = r.json()["adverb"]["syn"]
            except:
                total_list = []
                for pos in r.json():
                    for type_ in r.json()[pos]:
                        total_list.append(r.json()[pos][type_])
                self.synonyms = [
                    item for sublist in total_list for item in sublist
                ]

        if self.synonyms == None:
            return
        self.synonyms = [x.split(' ') for x in self.synonyms]

        temp_set = []

        for word in self.synonyms:
            temp_set.append(word[0])

        temp_set = set(temp_set)

        temp_set = [[x] for x in temp_set]

        self.synonyms = temp_set

        if self.is_plural == True:
            p = inflect.engine()
            all_synonyms = []
            for synonym in self.synonyms:
                new_synonyms = []
                for word in synonym:
                    if p.singular_noun(word) is False:
                        new_synonyms.append(plural.noun_plural(word))
                    else:
                        new_synonyms.append(word)
                all_synonyms.append(new_synonyms)

            self.synonyms = all_synonyms

        if self.tense != None:
            tense_synonyms = []
            for x in self.synonyms:
                multi_word = []
                for element in x:
                    try:
                        multi_word.append(
                            (verb.verb_conjugate(element,
                                                 tense=self.tense,
                                                 negate=False)))
                    except:
                        multi_word.append(element)
                tense_synonyms.append(multi_word)

            self.synonyms = tense_synonyms