Пример #1
0
def WriteAdjective(outfile, degree, lemma, adjective, standard=None):
  synset = lemma.synset.name
  count = base.GetCompoundCount(lemma)
  standard = '+' if standard else '-'
  terminals = base.LemmaToTerminals(adjective)
  outfile.write('Adj[DEG=%s,SNS="%s",FRQ=%d,%sstd] -> %s\n' %
                (degree, synset, count, standard, terminals))
Пример #2
0
def WriteRules(outfile):
    names = ([('m', i.lower()) for i in nltk.corpus.names.read('male.txt')] +
             [('f', i.lower()) for i in nltk.corpus.names.read('female.txt')])
    for gender, name in names:
        ambiguity = Ambiguity(name, gender)
        name = base.LemmaToTerminals(name)
        outfile.write('PrpN[NUM=sg,SEX=%s,FRQ=%d] -> %s\n' %
                      (gender, -ambiguity, name))
Пример #3
0
def WriteProperNoun(outfile, lemma, gender):
    if ShouldBeIgnored(lemma): return

    noun = base.LemmaToTerminals(lemma.name)
    count = base.GetCompoundCount(lemma) - 1
    if gender:
        outfile.write('PrpN[NUM=sg,SNS="%s",SEX=%s,FRQ=%d] -> %s\n' %
                      (lemma.synset.name, gender, count, noun))
    else:
        outfile.write('PrpN[NUM=sg,SNS="%s",FRQ=%d] -> %s\n' %
                      (lemma.synset.name, count, noun))
Пример #4
0
def WriteNoun(outfile, lemma, gender, number, name_override=None):
    if ShouldBeIgnored(name_override or lemma): return

    noun = base.LemmaToTerminals(name_override or lemma.name)
    count = base.GetCompoundCount(lemma)
    if gender:
        outfile.write('Noun[NUM=%s,SNS="%s",SEX=%s,FRQ=%s] -> %s\n' %
                      (number, lemma.synset.name, gender, count, noun))
    else:
        outfile.write('Noun[NUM=%s,SNS="%s",FRQ=%s] -> %s\n' %
                      (number, lemma.synset.name, count, noun))
Пример #5
0
def WriteRules(outfile):
    conjunctions = pickle.load(open(CONJUNCTIONS_PATH))
    for conjunction in conjunctions:
        name = conjunction[0].replace(' ', '_')
        semantics = conjunction[-2]
        is_series = '-+'[conjunction[-1]]
        flags = tuple('-+'[i] for i in conjunction[1:-2])
        if '...' in name:
            group = name
            first, second = [i.strip() for i in name.split('...')]

            first_terminals = base.LemmaToTerminals(first)
            second_terminals = base.LemmaToTerminals(second)

            common_args = (group, semantics) + flags + (is_series, )

            outfile.write(PAIR_TEMPLATE % (common_args +
                                           ('+', first_terminals)))
            outfile.write(PAIR_TEMPLATE % (common_args +
                                           ('-', second_terminals)))
        else:
            terminals = base.LemmaToTerminals(name)
            args = (semantics, ) + flags + (is_series, terminals)
            outfile.write(SINGLE_TEMPLATE % args)
Пример #6
0
def GetVerbRules(patterns):
    conjugation = pickle.load(open(VERBS_LIST))
    rules = []

    with_children = collections.defaultdict(set)
    for cls in nltk.corpus.verbnet.classids():
        if cls.count('-') > 1:
            name, number, suffix = cls.split('-', 2)
            base_class = name + '-' + number

            for number in suffix.split('-'):
                with_children[base_class].add(cls)
                base_class += '-' + number
            with_children[base_class].add(cls)
        else:
            with_children[cls].add(cls)

    for index, pattern, classes in patterns:
        for cls in classes:
            for frame_cls in with_children[cls]:
                frame_cls = nltk.corpus.verbnet.vnclass(frame_cls)
                for member in frame_cls.findall('MEMBERS/MEMBER'):
                    verb = member.attrib['name']
                    lemmas = member.attrib['wn'].replace('?', '').split()
                    if lemmas:
                        lemmas = [
                            nltk.corpus.wordnet.lemma_from_key(i + '::')
                            for i in lemmas
                        ]
                    else:
                        lemmas = nltk.corpus.wordnet.lemmas(verb, 'v')
                        if len(lemmas) > 1:
                            lemmas = []
                    for lemma in lemmas:
                        synset = lemma.synset.name
                        count = base.GetCompoundCount(lemma)
                        if verb not in conjugation: continue
                        for form, conjugated_verb in zip(
                                VERB_FORMS, conjugation[verb]):
                            conjugated_verb = base.LemmaToTerminals(
                                conjugated_verb)
                            args = (form, index, cls, synset, count,
                                    conjugated_verb)
                            rules.append(VERB_TEMPLATE % args)

    return rules
Пример #7
0
def HandleLex(node, *_):
    value = node.attrib['value'].replace('[+be]', '').strip()
    if value:
        return [base.LemmaToTerminals(i) for i in value.split()]
    else:
        return []