Exemplo n.º 1
0
def convert_postag(complex_word, candidates):
    specific_tag = NLP.pos_tag(complex_word)[0][1]
    generic_tag = get_type(specific_tag)
    # print(generic_tag)
    final_candidates = set()
    if generic_tag == "NN":  ### Nouns
        # print(generic_tag)
        for candidate in candidates:
            candidate_tag = NLP.pos_tag(candidate)[0][1]
            if specific_tag == "NNS" and candidate_tag != "NNS":
                candidate = pluralize(candidate)
                # print("pluraaal  ", candidate)
            elif specific_tag == "NN" and candidate_tag == "NNS":
                candidate = singularize(candidate)
                # print("singulaaar" , candidate)
            # print("wwilll add")
            final_candidates.add(candidate)
    elif generic_tag == "ADJ":  ## Adjectives
        for candidate in candidates:
            candidate_tag = NLP.pos_tag(candidate)[0][1]
            if specific_tag == "JJR" and candidate_tag != "JJR":
                candidate = comparative(candidate)
                # print(candidate , "jjr")
            elif specific_tag == "JJS" and candidate_tag != "JJS":
                # print(candidate , "jjs")
                candidate = superlative(candidate)
            # print(candidate , "added")
            final_candidates.add(candidate)
    elif generic_tag == "VB":  ## Verbs
        complex_tense = tenses(complex_word)
        if (len(complex_tense)) < 1: return candidates

        for candidate in candidates:
            # print("my tense" ,  complex_tense.upper()  ," candidate " , candidate , " ", tenses(candidate)[0][0] )
            if len(tenses(candidate)) > 0 and tenses(
                    candidate)[0][0] != complex_tense:
                if complex_tense == "past":
                    candidate = conjugate(candidate, tense=PAST)
                elif complex_tense == "present":
                    candidate = conjugate(candidate, tense=PRESENT)
                elif complex_tense == "future":
                    candidate = conjugate(candidate, tense=FUTURE)
                elif complex_tense == "infinitive":
                    candidate = conjugate(candidate, tense=INFINITIVE)
            final_candidates.add(candidate)
    else:
        final_candidates = candidates

    return final_candidates
def detect_line_tense(poem):
    poem_verb_set = []
    for line in poem:
        line_verb = ""
        if "'" in line:
            line = replace_contractions(line)
        for word, t in tag(line, tokenize=True):
            if t.startswith("V"):
                line_verb = str(word)
        poem_verb_set.append(line_verb)

    line_tenses = []
    for line_verb in poem_verb_set:
        if not line_verb:
            continue
        possible_tenses = []
        for tense in tenses(line_verb):
            possible_tenses.append(tense[0])
        try:
            line_tenses.append(detect_overall_tense(possible_tenses))
        except IndexError:
            line_tenses.append('')

    return line_tenses
        def write_hypo(parent, count, list_of_neighbors):

            return_dict = {}

            for index in range(0, len(list_of_neighbors)):
                s = wordnet.synsets(list_of_neighbors[index])
                if len(s) > 0:
                    s = s[0]

                    synomyms = s.synonyms
                    hypernyms = s.hypernyms()
                    hyponyms = s.hyponyms()
                    holonyms = s.holonyms()
                    meronyms = s.meronyms()
                    singulars = [singularize(list_of_neighbors[index])]
                    plurals = [pluralize(list_of_neighbors[index])]
                    comparatives = [comparative(list_of_neighbors[index])]
                    superlatives = [superlative(list_of_neighbors[index])]
                    lemmas = [lemma(list_of_neighbors[index])]
                    lexemes = [lexeme(list_of_neighbors[index])]
                    tensess = [tenses(list_of_neighbors[index])]
                    suggests = [suggest(list_of_neighbors[index])]

                    neighbors_with_link_string = None

                    if parent in synomyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SYNO]"
                    elif parent in hypernyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HYPER]"
                    elif parent in hyponyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HYPO]"
                    elif parent in holonyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HOLO]"
                    elif parent in meronyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[MERO]"
                    elif parent in singulars:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[PLURAL]"
                    elif parent in plurals:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SINGULAR]"
                    elif parent in comparatives:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[COMPA]"
                    elif parent in superlatives:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SUPERLA]"
                    elif parent in lemmas:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[LEMMA]"
                    elif parent in lexemes:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[LEXEME]"
                    elif parent in tensess:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[TENSE]"
                    elif parent in suggests:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[MISPELL]"

                    if neighbors_with_link_string:
                        try:
                            return_dict[word][1].append(
                                neighbors_with_link_string)
                        except:
                            return_dict[word] = (count,
                                                 [neighbors_with_link_string])
            return return_dict
Exemplo n.º 4
0
    def post_process(self, tokens: List[str]):
        # tokens = sentence.split(" ")
        lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
        result = []
        for i, (prev_token, token, next_token) in \
                enumerate(zip_longest([""] + tokens[:-1], tokens, tokens[1:], fillvalue="")):
            prev_prev_token = tokens[i - 2] if i >= 2 else ""
            # capitalise
            if i == 0:
                token = token[0].upper() + token[1:]
            if token == '1' and next_token == 'th':
                try:
                    if tokens[i + 2] == 'to' and tokens[i + 3] == 'last':
                        token = ''
                except IndexError:
                    pass
            if next_token == 'last' and token == 'to' and prev_token == 'th' and prev_prev_token == '1':
                token = ''
            if (token == 'a' or token == 'A'
                ) and len(next_token) > 0 and next_token[0] in self.vocals:
                token = token + "n"
            elif token == 'into' and next_token == 'between':
                token = 'in'
            elif token == 'th' and prev_token.endswith('1'):
                token = 'st'
                try:
                    if next_token == 'to' and tokens[i + 2] == 'last':
                        token = ''
                except IndexError:
                    pass
            elif token == 'th' and prev_token.endswith('2'):
                token = 'nd'
            elif token == 'th' and prev_token.endswith('3'):
                token = 'rd'
            elif prev_token == 'to' \
                    and (token.endswith("ed") or token.endswith("ing") or
                         tenses(token) and tenses(token)[0][0] == 'past'):
                token = lemmatizer.lemmatize(token, 'v')
            elif prev_token == 'in' or prev_token == 'from' and (any(
                    t[0] == 'past' for t in tenses(token))):
                # VERY HACKY

                if (prev_prev_token in [
                        'refrained', "refused", "prohibited", "prevented",
                        "hindered"
                ] or (prev_token == "in" and prev_prev_token == 'succeed')):
                    token = lemmatizer.lemmatize(token, 'v')
                    try:
                        token = pattern_en.verbs[token][5]
                    except:
                        if not token.endswith("ing"):
                            token = lemmatizer.lemmatize(token).rsplit(
                                "e", 1)[0] + "ing"

            elif prev_token in ('not',
                                "n't") and prev_prev_token in ("could",
                                                               "would", "did"):
                token = lemmatizer.lemmatize(token, 'v')
            else:
                pass
            result.append(token)
        return result
Exemplo n.º 5
0
def pass2act(doc, rec=False):
    parse = nlp(doc)
    newdoc = ''
    for sent in parse.sents:

        # Init parts of sentence to capture:
        subjpass = ''
        subj = ''
        verb = ''
        verbtense = ''
        adverb = {'bef': '', 'aft': ''}
        part = ''
        prep = ''
        agent = ''
        aplural = False
        advcltree = None
        aux = list(list(nlp('. .').sents)[0])  # start with 2 'null' elements
        xcomp = ''
        punc = '.'
        # Analyse dependency tree:
        for word in sent:
            if word.dep_ == 'advcl':
                if word.head.dep_ in ('ROOT', 'auxpass'):
                    advcltree = word.subtree
            if word.dep_ == 'nsubjpass':
                if word.head.dep_ == 'ROOT':
                    subjpass = ''.join(
                        w.text_with_ws.lower() if w.tag_ not in (
                            'NNP', 'NNPS') else w.text_with_ws
                        for w in word.subtree).strip()
            if word.dep_ == 'nsubj':
                subj = ''.join(w.text_with_ws.lower() if w.tag_ not in (
                    'NNP', 'NNPS') else w.text_with_ws
                               for w in word.subtree).strip()
                if word.head.dep_ == 'auxpass':
                    if word.head.head.dep_ == 'ROOT':
                        subjpass = subj
            if word.dep_ in ('advmod', 'npadvmod', 'oprd'):
                if word.head.dep_ == 'ROOT':
                    if verb == '':
                        adverb['bef'] = ''.join(
                            w.text_with_ws.lower() if w.tag_ not in (
                                'NNP', 'NNPS') else w.text_with_ws
                            for w in word.subtree).strip()
                    else:
                        adverb['aft'] = ''.join(
                            w.text_with_ws.lower() if w.tag_ not in (
                                'NNP', 'NNPS') else w.text_with_ws
                            for w in word.subtree).strip()
            if word.dep_ == 'auxpass':
                if word.head.dep_ == 'ROOT':
                    if not subjpass:
                        subjpass = subj
            if word.dep_ in ('aux', 'auxpass', 'neg'):
                if word.head.dep_ == 'ROOT':
                    aux += [word]
            if word.dep_ == 'ROOT':
                verb = word.text
                if word.tag_ == 'VB':
                    verbtense = en.INFINITIVE
                elif word.tag_ == 'VBD':
                    verbtense = en.PAST
                elif word.tag_ == 'VBG':
                    verbtense = en.PRESENT
                    verbaspect = en.PROGRESSIVE
                elif word.tag_ == 'VBN':
                    verbtense = en.PAST
                else:
                    try:
                        verbtense = en.tenses(word.text)[0][0]
                    except IndexError:
                        pass
            if word.dep_ == 'prt':
                if word.head.dep_ == 'ROOT':
                    part = ''.join(w.text_with_ws.lower() if w.tag_ not in (
                        'NNP', 'NNPS') else w.text_with_ws
                                   for w in word.subtree).strip()
            if word.dep_ == 'prep':
                if word.head.dep_ == 'ROOT':
                    prep = ''.join(w.text_with_ws.lower() if w.tag_ not in (
                        'NNP', 'NNPS') else w.text_with_ws
                                   for w in word.subtree).strip()
            if word.dep_.endswith('obj'):
                if word.head.dep_ == 'agent':
                    if word.head.head.dep_ == 'ROOT':
                        agent = ''.join(
                            w.text + ', ' if w.dep_ == 'appos' else (
                                w.text_with_ws.lower() if w.tag_ not in (
                                    'NNP', 'NNPS') else w.text_with_ws)
                            for w in word.subtree).strip()
                        aplural = word.tag_ in ('NNS', 'NNPS')
            if word.dep_ in ('xcomp', 'ccomp', 'conj'):
                if word.head.dep_ == 'ROOT':
                    xcomp = ''.join(w.text_with_ws.lower() if w.tag_ not in (
                        'NNP', 'NNPS') else w.text_with_ws
                                    for w in word.subtree).strip()
                    that = xcomp.startswith('that')
                    xcomp = pass2act(xcomp, True).strip(' .')
                    if not xcomp.startswith('that') and that:
                        xcomp = 'that ' + xcomp
            if word.dep_ == 'punct' and not rec:
                if word.text != '"':
                    punc = word.text

        # exit if not passive:
        if subjpass == '':
            newdoc += str(sent) + ' '
            continue

        # if no agent is found:
        if agent == '':
            # what am I gonna do? BITconEEEEEEECT!!!!
            newdoc += str(sent) + ' '
            continue

        # invert nouns:
        agent = nouninv(agent)
        subjpass = nouninv(subjpass)

        # F*****G CONJUGATION!!!!!!!!!!!!!:
        auxstr = ''
        num = en.SINGULAR if not aplural or agent in ('he',
                                                      'she') else en.PLURAL
        aux.append(aux[0])
        verbaspect = None
        for (pp, p, a, n) in zip(aux, aux[1:], aux[2:], aux[3:]):
            if a.lemma_ == '.':
                continue

            if a.lemma_ == 'not':
                if p.lemma_ == 'be':
                    if n.lemma_ == 'be':
                        verbtense = en.tenses(a.text)[0][0]
                        auxstr += en.conjugate('be',
                                               tense=en.tenses(p.text)[0][0],
                                               number=num) + ' '
                        verbaspect = en.PROGRESSIVE
                    else:
                        auxstr += en.conjugate('do',
                                               tense=en.tenses(p.text)[0][0],
                                               number=num) + ' '
                        verbtense = en.INFINITIVE
                auxstr += 'not '
            elif a.lemma_ == 'be':
                if p.lemma_ == 'be':
                    verbtense = en.tenses(a.text)[0][0]
                    auxstr += en.conjugate(
                        'be', tense=en.tenses(a.text)[0][0], number=num) + ' '
                    verbaspect = en.PROGRESSIVE
                elif p.tag_ == 'MD':
                    verbtense = en.INFINITIVE
            elif a.lemma_ == 'have':
                num == en.PLURAL if p.tag_ == 'MD' else num
                auxstr += en.conjugate(
                    'have', tense=en.tenses(a.text)[0][0], number=num) + ' '
                if n.lemma_ == 'be':
                    verbaspect = en.PROGRESSIVE
                    verbtense = en.tenses(n.text)[0][0]
            else:
                auxstr += a.text_with_ws
        auxstr = auxstr.lower().strip()

        if verbaspect:
            verb = en.conjugate(verb, tense=verbtense, aspect=verbaspect)
        else:
            verb = en.conjugate(verb, tense=verbtense)

        advcl = ''
        if advcltree:
            for w in advcltree:
                if w.pos_ == 'VERB' and en.tenses(
                        w.text)[0][4] == en.PROGRESSIVE:
                    advcl += 'which ' + en.conjugate(
                        w.text, tense=en.tenses(verb)[0][0]) + ' '
                else:
                    advcl += w.text_with_ws

        newsent = ' '.join(
            list(
                filter(None, [
                    agent, auxstr, adverb['bef'], verb, part, subjpass,
                    adverb['aft'], advcl, prep, xcomp
                ]))) + punc
        if not rec:
            newsent = newsent[0].upper() + newsent[1:]
        newdoc += newsent + ' '
    return newdoc