Exemplo n.º 1
0
def WriteAdjective(outfile, degree, lemma, adjective, standard=None):
  synset = lemma.synset.name
  count = base.GetCompoundCount(lemma)
  standard = '+' if standard else '-'
  terminals = base.LemmaToTerminals(adjective)
  outfile.write('Adj[DEG=%s,SNS="%s",FRQ=%d,%sstd] -> %s\n' %
                (degree, synset, count, standard, terminals))
Exemplo n.º 2
0
def WriteProperNoun(outfile, lemma, gender):
    if ShouldBeIgnored(lemma): return

    noun = base.LemmaToTerminals(lemma.name)
    count = base.GetCompoundCount(lemma) - 1
    if gender:
        outfile.write('PrpN[NUM=sg,SNS="%s",SEX=%s,FRQ=%d] -> %s\n' %
                      (lemma.synset.name, gender, count, noun))
    else:
        outfile.write('PrpN[NUM=sg,SNS="%s",FRQ=%d] -> %s\n' %
                      (lemma.synset.name, count, noun))
Exemplo n.º 3
0
def WriteNoun(outfile, lemma, gender, number, name_override=None):
    if ShouldBeIgnored(name_override or lemma): return

    noun = base.LemmaToTerminals(name_override or lemma.name)
    count = base.GetCompoundCount(lemma)
    if gender:
        outfile.write('Noun[NUM=%s,SNS="%s",SEX=%s,FRQ=%s] -> %s\n' %
                      (number, lemma.synset.name, gender, count, noun))
    else:
        outfile.write('Noun[NUM=%s,SNS="%s",FRQ=%s] -> %s\n' %
                      (number, lemma.synset.name, count, noun))
Exemplo n.º 4
0
def GetVerbRules(patterns):
    conjugation = pickle.load(open(VERBS_LIST))
    rules = []

    with_children = collections.defaultdict(set)
    for cls in nltk.corpus.verbnet.classids():
        if cls.count('-') > 1:
            name, number, suffix = cls.split('-', 2)
            base_class = name + '-' + number

            for number in suffix.split('-'):
                with_children[base_class].add(cls)
                base_class += '-' + number
            with_children[base_class].add(cls)
        else:
            with_children[cls].add(cls)

    for index, pattern, classes in patterns:
        for cls in classes:
            for frame_cls in with_children[cls]:
                frame_cls = nltk.corpus.verbnet.vnclass(frame_cls)
                for member in frame_cls.findall('MEMBERS/MEMBER'):
                    verb = member.attrib['name']
                    lemmas = member.attrib['wn'].replace('?', '').split()
                    if lemmas:
                        lemmas = [
                            nltk.corpus.wordnet.lemma_from_key(i + '::')
                            for i in lemmas
                        ]
                    else:
                        lemmas = nltk.corpus.wordnet.lemmas(verb, 'v')
                        if len(lemmas) > 1:
                            lemmas = []
                    for lemma in lemmas:
                        synset = lemma.synset.name
                        count = base.GetCompoundCount(lemma)
                        if verb not in conjugation: continue
                        for form, conjugated_verb in zip(
                                VERB_FORMS, conjugation[verb]):
                            conjugated_verb = base.LemmaToTerminals(
                                conjugated_verb)
                            args = (form, index, cls, synset, count,
                                    conjugated_verb)
                            rules.append(VERB_TEMPLATE % args)

    return rules
Exemplo n.º 5
0
def ShouldBeIgnored(lemma):
    return ((IGNORE_PROPER_MULTIWORD and '_' in lemma.name)
            or (IGNORE_UNCOMMON and not base.GetCompoundCount(lemma))
            or lemma.name in EXCEPTIONS or len(lemma.name) == 1)