Exemplos de getInflection em Python, exemplos de lemminflect.getInflection em Python

Exemplo n.º 1

0

Exibir arquivo

def tense_of_verb(verb_str):
    """
    Identifies the verb tense of a word, and returns it in a tuple along with its base word.
    @param verb_str: a str containing a verb
    @return: a tuple t, where t[0] is 'AUX' if the verb is a special auxiliary verb, 
        is '?' if the verb tense cannot be recognized, and otherwise is 'VBD', 'VBP', or 'VBZ',
        which correspond to the Penn Treebank P.O.S. tags for past tense, non-3rd person present 
        tense, and 3rd person present tense.
    """
    aux_verbs = [
        'am', 'is', 'are', 'was', 'were', 'have', 'has', 'had', 'do', 'does',
        'did', 'will', 'would', 'shall', 'should', 'may', 'might', 'must',
        'can', 'could', 'ought'
    ]
    if verb_str.lower() in aux_verbs:
        return ('AUX', verb_str)
    lemm_str = getLemma(verb_str, upos='VERB')[0]
    if verb_str in getInflection(lemm_str, tag='VBD'):
        return ('VBD', lemm_str)
    elif verb_str in getInflection(lemm_str, tag='VBP'):
        return ('VBP', lemm_str)
    elif verb_str in getInflection(lemm_str, tag='VBZ'):
        return ('VBZ', lemm_str)
    else:
        return ('?', lemm_str)

Exemplo n.º 2

0

Exibir arquivo

 def testProperNouns(self):
     infls = lemminflect.getInflection('Alaskan', 'NN', inflect_oov=False)
     self.assertEqual(len(infls), 0)
     infls = lemminflect.getInflection('Alaskan', 'NNP', inflect_oov=False)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Alaskan')
     infls = lemminflect.getInflection('Alaskan', 'NNPS', inflect_oov=False)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Alaskans')
     infls = lemminflect.getInflection('Axxlaskan', 'NNP', inflect_oov=True)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Axxlaskan')
     infls = lemminflect.getInflection('Axxlaskan',
                                       'NNPS',
                                       inflect_oov=True)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Axxlaskans')
     lemminflect.Inflections().setUseInternalLemmatizer(
         True)  # lemmatize with lemminflect
     token = self.nlp('The Alaskan went South.')[1]
     self.assertEqual(token._.inflect('NNPS', inflect_oov=False),
                      'Alaskans')
     token = self.nlp('The Axxlaskan went South.')[1]
     self.assertEqual(token._.inflect('NNPS', inflect_oov=True),
                      'Axxlaskans')

Exemplo n.º 3

0

Exibir arquivo

 def testOverrides(self):
     # run the inflection system once to assure the overrides is loaded (ie.. lazy loading)
     lemminflect.getInflection('watch', 'VBD'), ('watched', )
     # Hack the code to replace the overrides dictionary
     orig_dict = lemminflect.Inflections().overrides_dict
     with self.assertLogs():
         lemmas = lemminflect.getLemma('WORD', 'X')
     self.assertEqual(lemmas, ())
     with self.assertLogs():
         lemmas = lemminflect.getAllLemmas('WORD', 'X')
     self.assertEqual(lemmas, {})
     with self.assertLogs():
         lemmas = lemminflect.getAllLemmasOOV('WORD', 'X')
     self.assertEqual(lemmas, {})
     token = self.nlp('I')[0]
     self.assertEqual(token._.lemma(), 'I')
     lemminflect.Inflections().overrides_dict = {
         'watch': {
             'VBD': ('xxx', )
         }
     }
     inflections = lemminflect.getInflection('watch',
                                             'VBD',
                                             inflect_oov=False)
     self.assertEqual(inflections, ('xxx', ))
     # put the original dictionary back
     lemminflect.Inflections().overrides_dict = orig_dict

Exemplo n.º 4

0

Exibir arquivo

 def testGetInflection04(self):
     self.assertEqual(lemminflect.getAllInflections('watch', 'ADJ'), {})
     self.assertEqual(
         lemminflect.getInflection('watch', 'JJ', inflect_oov=False), ())
     self.assertEqual(
         lemminflect.getInflection('watch', 'JJ', inflect_oov=True),
         ('watch', ))
     self.assertEqual(lemminflect.getInflection('watch', 'VBD'),
                      ('watched', ))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: InflectionRulesTests.py Projeto: stjordanis/LemmInflect

 def testGetInflectionOOV(self):
     self.assertEqual(lemminflect.getInflection('xxbike',    'NN',  inflect_oov=False), ())
     self.assertEqual(lemminflect.getInflection('xxbike',    'NNS', inflect_oov=False), ())
     self.assertEqual(lemminflect.getInflection('xxbike',    'NN',  inflect_oov=True), ('xxbike',))         # reg
     self.assertEqual(lemminflect.getInflection('xxbike',    'NNS', inflect_oov=True), ('xxbikes',))        # reg
     self.assertEqual(lemminflect.getInflection('xxbaggy',   'JJR', inflect_oov=True), ('xxbaggier',))      # reg
     self.assertEqual(lemminflect.getInflection('xxclean',   'RBS', inflect_oov=True), ('xxcleanest',))     # reg
     self.assertEqual(lemminflect.getInflection('xxformat',  'VBG', inflect_oov=True), ('xxformatting',))   # regd
     self.assertEqual(lemminflect.getInflection('xxbacklog', 'VBD', inflect_oov=True), ('xxbacklogged',))   # regd
     self.assertEqual(lemminflect.getInflection('xxgenesis', 'NNS', inflect_oov=True), ('xxgeneses',))      # glreg
     self.assertEqual(lemminflect.getInflection('xxalumus',  'NNS', inflect_oov=True), ('xxalumi',))        # glreg

Exemplo n.º 6

0

Exibir arquivo

def match_pronoun_present(verb_str: str, pronoun_str: str) -> str:
    """
    Returns a verb form that matches the passed pronoun.
    This function should only be used for present tense
    """
    pronoun_str = pronoun_str.lower()
    if pronoun_str not in __pronoun_to_verb_upenn_dict.keys():
        raise ValueError(
            'Unexpected value for pronoun "{}"'.format(pronoun_str))
    aff_verb_str, negation_str = split_verb_negation(verb_str)
    if is_modal_verb(aff_verb_str):
        return verb_str
    lemma_lst = getLemma(aff_verb_str, "VERB")
    lemma_lst = __collapse_lemma_list(lemma_lst)
    if len(lemma_lst) != 1:
        logging.warning(
            'WARNING: Ambigous or no lemma for "{}". Output was {}. Keeping original verb.'
            .format(verb_str, lemma_lst))
        return verb_str
    lemma_str = lemma_lst[0]
    inflect_lst = getInflection(lemma_str,
                                __pronoun_to_verb_upenn_dict[pronoun_str])
    if len(inflect_lst) > 2 or not len(inflect_lst):
        logging.warning(
            'WARNING: Ambigous or no inflection list for lemma "{}" from verb "{}". Output was {}. Keeping original verb.'
            .format(lemma_str, verb_str, inflect_lst))
        return verb_str
    elif len(inflect_lst) == 2:
        if pronoun_str == 'i':
            new_verb_str = inflect_lst[0]
        else:
            new_verb_str = inflect_lst[1]
    else:
        new_verb_str = inflect_lst[0]
    return merge_verb_negation(new_verb_str, negation_str)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: lexical_en.py Projeto: DavidHeineman/lexi-server

 def filter_out_tense(self, sent, so, eo, candidates):
     stems = []
     out = []
     word_tag = nltk.pos_tag([sent[so:eo]])[0][1]
     stems.append(self.ps.stem(sent[so:eo]))
     for word in candidates:
         cand_stem = self.ps.stem(word)
         if cand_stem not in stems:
             stems.append(cand_stem)
             try:
                 cand_tag = self.tag_for_lemmatizer(word)
                 if cand_tag is None:
                     out.append(
                         getInflection(self.lem.lemmatize(word,
                                                          pos=cand_tag),
                                       tag=word_tag)[0])
                 else:
                     out.append(word)
             except IndexError:
                 # Lemminflect does not support all POS tags - lemminflect.readthedocs.io/en/latest/tags/
                 out.append(word)
                 logger.debug(
                     "ERROR: Lemminflect cannot convert {} with type {}, skipping"
                     .format(word, word_tag))
     return out

Exemplo n.º 8

0

Exibir arquivo

Arquivo: inflection.py Projeto: inthescales/lyres-dictionary

def inflect(string, mode):

    words = string.split(" ")
    for i, word in enumerate(words):
        if word[0] == "[" and word[-1] == "]":
            words[i] = word[1:-1]
        elif len(words) > 1:
            continue

        # Local checking for forms 3rd party library does wrong
        override = override_inflection(words[i], mode)
        if override != None:
            return override

        if mode == "ppart":
            words[i] = lemminflect.getInflection(words[i], tag='VBN')[0]
        elif mode == "part":
            words[i] = lemminflect.getInflection(words[i], tag='VBG')[0]
        elif mode == "3sg":
            words[i] = lemminflect.getInflection(words[i], tag='VBZ')[0]
        elif mode == "inf":
            continue
        elif mode == "sg":
            words[i] = lemminflect.getInflection(words[i], tag='NN')[0]
        elif mode == "pl":
            words[i] = lemminflect.getInflection(words[i], tag='NNS')[0]
        elif mode == "mass":
            words[i] = lemminflect.getInflection(words[i], tag='NN')[0]
        elif mode == "singleton":
            words[i] = lemminflect.getInflection(words[i], tag='NN')[0]

    return " ".join(words)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: InflectionTests.py Projeto: bjascob/LemmInflect

 def testGetInflection03(self):
     self.assertEqual(lemminflect.getAllInflections('watch'),
         {'NNS': ('watches', 'watch'), 'NN': ('watch',), 'VBD': ('watched',),
         'VBG': ('watching',), 'VBZ': ('watches',), 'VB': ('watch',), 'VBP': ('watch',)})
     self.assertEqual(lemminflect.getAllInflections('watch', 'VERB'),
         {'VBD': ('watched',), 'VBG': ('watching',), 'VBZ': ('watches',),
          'VB': ('watch',), 'VBP': ('watch',)})
     self.assertEqual(lemminflect.getInflection('watch', 'VBD'), ('watched',))
     self.assertEqual(lemminflect.getAllInflections('watch', 'ADJ'), {})

Exemplo n.º 10

0

Exibir arquivo

Arquivo: words.py Projeto: phueb/Zorro

def get_legal_words(tag: str,
                    second_tag: Optional[str] = None,  # also counterbalance list of other word forms (e.g. plural)
                    seed: int = configs.Data.seed,
                    exclude: Optional[Tuple[str, ...]] = None,
                    verbose: bool = False,
                    ) -> Union[List[str], List[Tuple[str, str]]]:

    print(f'Obtaining counterbalanced subset of legal words with tag={tag} and second_tag={second_tag}')

    # get words with requested tag and order
    df_legal = pd.read_csv(configs.Dirs.legal_words / f'{tag}.csv')
    bool_ids = df_legal['is_legal'].astype(bool).tolist()
    first_forms_ = df_legal['word'][bool_ids].tolist()

    # exclude any words ?
    if exclude:
        first_forms_ = [w for w in first_forms_ if w not in exclude]

    # also counterbalance 2nd forms of words ?
    if second_tag is None:
        second_forms_ = None
    elif second_tag == 'NNP':
        plural = inflect.engine()
        second_forms_ = [plural.plural(w) for w in first_forms_]
    elif second_tag.startswith('VB'):
        lemmas = [getLemma(w, upos='VERB')[0] for w in first_forms_]
        second_forms_ = [getInflection(lemma, tag=second_tag)[0] for lemma in lemmas]  # requires lemma as input
    else:
        raise AttributeError('Invalid arg to second_tag')

    # remove words if their 2nd form is not in vocab or if it is identical to 1st form
    if second_tag is not None:
        first_forms = []
        second_forms = []
        for w1, w2 in zip(first_forms_, second_forms_):
            if w2 in vocab and w2 != w1:
                first_forms.append(w1)
                second_forms.append(w2)
                if verbose:
                    print(f'Included {w1:<12} and {w2:<12}')
        assert first_forms
        assert second_forms
    else:
        first_forms = first_forms_
        second_forms = second_forms_

    # find subset of words such that their total corpus frequencies are approx equal across corpora
    num_words_in_sample = configs.Data.tag2num_words[tag]
    res = find_counterbalanced_subset(first_forms,
                                      min_size=num_words_in_sample,
                                      max_size=num_words_in_sample+100,
                                      second_forms=second_forms,
                                      seed=seed,
                                      verbose=verbose,
                                      )

    return res

Exemplo n.º 11

0

Exibir arquivo

Arquivo: verb_inflection.py Projeto: nymwa/arteraro

def sample_verb(tag_list, source_tag, source):
    tag_list = [tag for tag in tag_list if tag != source_tag]
    tag = rd.choice(tag_list)
    cand_list = getInflection(source, tag)
    if cand_list == []:
        cand_list = getAllInflectionsOOV(source, upos='VERB').values()
    if len(cand_list) > 0:
        cand = rd.choice(cand_list)
    else:
        cand = None
    return cand

Exemplo n.º 12

0

Exibir arquivo

Arquivo: recast_tbdense_rte.py Projeto: sidsvash26/temporal_nli

def inflection(pred_lemma, pred_pos, pred_word):
    if pred_pos == "VERB":
        inflection = getInflection(pred_lemma, tag='VBG')[0]
        # to cater to the errors in the lemma
        if pred_lemma.lower().endswith("ing"):
            return pred_word
        else:
            return inflection
        return
    else:
        return pred_word

Exemplo n.º 13

0

Exibir arquivo

 def testUPOSLog(self):
     with self.assertLogs():
         infl = lemminflect.getInflection('WORD', 'X')
     self.assertEqual(infl, ())
     with self.assertLogs():
         infls = lemminflect.getAllInflections('WORD', 'X')
     self.assertEqual(infls, {})
     with self.assertLogs():
         infls = lemminflect.getAllInflectionsOOV('WORD', 'X')
     self.assertEqual(infls, {})
     token = self.nlp('testing')[0]
     self.assertEqual(token._.inflect('X'), 'testing')

Exemplo n.º 14

0

Exibir arquivo

Arquivo: inflector.py Projeto: databill86/replaCy

    def inflect_lemma(self, lemma, tag=None, pos=None):

        inflections = []
        # tag based
        if tag:
            inflection_tuple = lemminflect.getInflection(lemma, tag=tag)
            inflections = list(inflection_tuple)
        else:
            # pos based, can be None too
            inflection_dict = lemminflect.getAllInflections(lemma, upos=pos)
            for i in inflection_dict.values():
                inflections += list(i)

        return inflections

Exemplo n.º 15

0

Exibir arquivo

Arquivo: bite_wordpiece.py Projeto: salesforce/bite

 def convert_tokens_to_string(self, tokens):
     result = []
     for i, token in enumerate(tokens):
         # combine wordpiece tokens
         if len(token) > 2 and token[:2] == '##':
             if result:
                 result[-1] += token[2:]
             else:
                 result.append(token[2:])
             continue
         if token in self.inflection_tokens:
             if i != 0:
                 inflected = getInflection(result[-1], tag=token[1:-1])
                 if inflected:
                     result[-1] = inflected[0]
         else:
             result.append(token)
     return ' '.join(result)

Exemplo n.º 16

0

Exibir arquivo

def get_lemminflect(token):
    text = token.text
    lemma = token.lemma_
    tag = token.tag_
    pos = token.pos_
    word_lemminflect = set()
    if pos not in REPLACE_POS:
        return list(word_lemminflect)

    tags = POS_TO_TAGS[pos]
    for tg in tags:
        if tg == tag: continue
        inflects = getInflection(lemma, tag=tg)
        for word in inflects:
            if word.lower() != text.lower():
                word_lemminflect.add(word)

    return list(word_lemminflect)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: base_inflect.py Projeto: salesforce/bite

    def detokenize(self,
                   tokens: List[str],
                   as_list: bool = False) -> Union[str, List[str]]:
        result = []
        for i, token in enumerate(tokens):
            # combine wordpiece tokens
            if token in self.reverse_single_char_map:
                token = self.reverse_single_char_map[token]
            if token in self.inflection_tokens:
                if i != 0:
                    inflected = getInflection(result[-1], tag=token[1:-1])
                    if inflected:
                        result[-1] = inflected[0]
            else:
                result.append(token)

        if as_list:
            # Allow users to detokenize using their own detokenizers
            return result
        if self.pretok_type == 'moses':
            return self.detokenizer.detokenize(result)
        return ' '.join(result)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: verb_inflection.py Projeto: nymwa/arteraro

    def __call__(self, sent, index):
        # get word
        if sent[index].org is not None:
            word = sent[index].org
        else:
            word = sent[index].lemma

        # get cand
        cand = None
        source = word.lower()
        if source != '':
            source_tag = sent[index].tag
            tag_list = [tag for tag in self.tag_list if tag != source_tag]
            tag = rd.choice(tag_list)
            cand_list = getInflection(source, tag)
            if cand_list == []:
                cand_list = getAllInflectionsOOV(source, upos='VERB').values()
            if len(cand_list) > 0:
                cand = rd.choice(cand_list)

        # replace to cand
        if cand is not None:
            if word.istitle():
                cand = cand.title()
            sent[index].org = cand
            if ((index >= 1 and sent[index - 1].pos != 'AUX')
                    and (index >= 2 and sent[index - 2].pos != 'AUX')
                    and self.sampler() < self.aux_ratio
                ):  # 直前にAUXがなくVBG, VBNなら"have (been)"の変化を直前に挿入する
                if tag == 'VBG':
                    sent[index].addition.append(
                        EnToken(index=sent[index].index - 0.25,
                                org=self.vbg_sampler()))
                elif tag == 'VBN':
                    sent[index].addition.append(
                        EnToken(index=sent[index].index - 0.25,
                                org=self.vbn_sampler()))
            sent[index] = self.add_history(sent[index])
        return sent

Exemplo n.º 19

0

Exibir arquivo

Arquivo: Tenses.py Projeto: nicklogin/DisGen

 def inflect(self, tag):
     self.tag = tag
     self.text = lemminflect.getInflection(self.lemma, tag)[0]
     self.text_with_ws = self.text + self.whitespace

Exemplo n.º 20

0

Exibir arquivo

def sample_cand(tag_list, source_tag, source):
    tag_list = [tag for tag in tag_list if tag != source_tag]
    tag = rd.choice(tag_list)
    cand_list = getInflection(source, tag)
    cand = rd.choice(cand_list)
    return cand

Exemplo n.º 21

0

Exibir arquivo

Arquivo: app.py Projeto: aolney/LemmInflect

def api_getInflection():
    content = request.json
    result = getInflection(content['lemma'], content['tag'],
                           content['inflect_oov'])
    return jsonify(result)

Exemplo n.º 22

0

Exibir arquivo

import json
import codecs

with codecs.open('svo_triples_lemmatised.txt', 'r', 'utf-8-sig') as json_file:
    svo_triples = json.load(json_file)

svo_triples = svo_triples[3:]

pos_sentences = []
neg_sentences = []
passive_sentences = []
swapped_sentences = []

for triple in svo_triples:
    triple_subject = lem.getInflection(triple[0], tag='NNS', inflect_oov=False)
    triple_verb = lem.getInflection(triple[1], tag='VBD', inflect_oov=False)
    triple_negative_verb = lem.getInflection(triple[1],
                                             tag='VB',
                                             inflect_oov=False)
    triple_passive_verb = lem.getInflection(triple[1],
                                            tag='VBN',
                                            inflect_oov=False)
    triple_object = lem.getInflection(triple[2], tag='NNS', inflect_oov=False)
    if triple_subject != () and triple_verb != () and triple_object != ():
        #print((triple_subject,triple_verb,triple_object))
        #print(triple)
        pos_sentences += [
            triple_subject[0].capitalize() + " " + triple_verb[0] + " " +
            triple_object[0] + "."
        ]

Exemplo n.º 23

0

Exibir arquivo

Arquivo: documents.py Projeto: MosesMendoza/text-inflector

async def postInflection(word: Word, pos: PartOfSpeech = Body(...)):
  text = word.text
  tag = pos.tag
  inflection = getInflection(text, tag)
  return { "inflection": inflection }

Exemplo n.º 24

0

Exibir arquivo

def inflection(pred_lemma, pred_pos, pred_word):
    #print(f"lemma: {pred_lemma}, pos: {pred_pos}, word: {pred_word}")
    if pred_pos=="VERB":
        return getInflection(pred_lemma, tag='VBG')[0]
    else:
        return pred_word

Exemplo n.º 25

0

Exibir arquivo

 def testGetInflection02(self):
     self.assertEqual(lemminflect.getInflection('squirrel', 'NN'),
                      ('squirrel', ))
     self.assertEqual(lemminflect.getInflection('squirrel', 'NNS'),
                      ('squirrels', 'squirrel'))