Python Earley примеры, earley.Earley Python примеры использования

Пример #1

0

Показать файл

def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total

Пример #2

0

Показать файл

Файл: binarizable.py Проект: Iason/Thesis_sampling_from_probabilistic_context_free_grammars

def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total

Пример #3

0

Показать файл

Файл: itg-parse.py Проект: we1l1n/pcfg-sampling

def main(args):
    wcfg = WCFG(read_grammar_rules(args.grammar))
    #print 'GRAMMAR'
    #print wcfg

    for input_str in args.input:
        wfsa = make_linear_fsa(input_str)
        #print 'FSA'
        #print wfsa
        parser = Earley(wcfg, wfsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            print '# PERMUTATIONS'
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print 'permutation=(%s) derivations=%d' % (' '.join(
                    str(i) for i in p), n)
                total += n
            print 'permutations=%d derivations=%d' % (len(
                counts['p'].keys()), total)
            print

Пример #4

0

Показать файл

Файл: main.py Проект: Rigeru/NLP

def earley(sentence):
    earley = Earley()
    #earley.readCFGRules(FilePath.ROOT + "rules_small.txt")
    earley.readCFGRules(FilePath.ROOT + "rules.txt")
    #earley.showRules()
    earley.parse(sentence)
    earley.showStates()
    print("")
    earley.traceRoot()

Пример #5

0

Показать файл

def test_predict():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.predict(0)

    is_added = False
    to_add = Situation('S', 'a', 0, 0)
    for sit in earley.situations_dict[0]:
        if sit == to_add:
            is_added = True
            break

    assert is_added

Пример #6

0

Показать файл

def test_scan():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.situations_dict[0].add(Situation('S', 'a', 0, 0))
    earley.scan(0, 'a')

    is_added = False
    to_add = Situation('S', 'a', 0, 1)
    for sit in earley.situations_dict[1]:
        if sit == to_add:
            is_added = True
            break

    assert is_added

Пример #7

0

Показать файл

def test_intersection_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    wfsa = WDFSA()
    for word in wcfg.terminals:
        wfsa.add_arc(0, 0, make_terminal(word), 0.0)
    wfsa.add_arc(0, 0, make_terminal('dog'), -0.5)
    wfsa.make_initial(0)
    wfsa.make_final(0)
    # Intersection
    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if forest.get('[NN,0-0]')[1].log_prob == -1.7039:
        print "Succeed, the earley intersection correctly changes the weight for a unigram automata"

Пример #8

0

Показать файл

Файл: itg_parse.py Проект: finde/NLP2_project

def get_forest(input_str, wcfg):
    wfsa = make_linear_fsa(input_str)

    # print 'FSA'
    # print wfsa

    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if not forest:
        return 'NO PARSE FOUND'

    new_rules = []
    for rule in forest:
        if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
            new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
    [forest.add(rule) for rule in new_rules]

    return forest

Пример #9

0

Показать файл

def initialise(wcfg, wfsa, root, goal, intersection):
    """
    Calculate a first derivation based on a simpler (thus smaller/faster) version of the grammar
    Thereby determining the initial conditions.
    Only applicable with the 'milos' grammar format, i.e. non-terminals have the form: '[P1234*2_1]'
    """
    smaller = WCFG([])

    logging.debug('Creating a smaller grammar for initial conditions...')
    for line in wcfg:
        if 0 < permutation_length(line.lhs) <= 2:
            smaller.add(line)
        elif line.lhs == root or line.lhs == '[UNK]':
            smaller.add(line)

    if intersection == 'nederhof':
        init_parser = Nederhof(smaller, wfsa)
    elif intersection == 'earley':
        init_parser = Earley(smaller, wfsa)
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Init Parsing...')
    init_forest = init_parser.do(root, goal)

    if not init_forest:
        print 'NO PARSE FOUND'
        return {}
    else:
        logging.debug('Forest: rules=%d', len(init_forest))

        logging.debug('Init Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(init_forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Init Inside...')
        init_inside_prob = inside(init_forest, sorted_nodes)

        logging.debug('Init Sampling...')
        gen_sampling = GeneralisedSampling(init_forest, init_inside_prob)
        init_d = gen_sampling.sample(goal)

    return get_conditions(init_d)

Пример #10

0

Показать файл

Файл: main.py Проект: purfreak/fl_practicum_2

def main():
    word = input('Введите распознаваемое слово\n')

    earley = Earley(word)

    n = int(input('Введите количество правил в грамматике: '))
    print('Введите правила грамматики в формате S -> aB')
    for i in range(n):
        parts = input(f'Правило {i + 1} из {n}: ').split()
        if len(parts) != 3:
            print("Неправильное правило")
            exit(1)
        earley.add_rule(Rule(parts[0], parts[2]))
    earley.add_rule(Rule('S#', 'S'))

    if earley.get_answer():
        print("YES\n")
    else:
        print("NO\n")

Пример #11

0

Показать файл

def test_final_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    sentence = "the dog barks"
    wfsa = make_linear_fsa(sentence)
    # Intersection
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == 0.0:
        print "Succeed, default final weight is 0.0 in log semiring"
    wfsa.make_final(len(sentence.split()), -0.5)
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == -0.5:
        print "Succeed, change final weight to -0.5 in log semiring"

Пример #12

0

Показать файл

Файл: test.py Проект: SaptakS/nlp-demo

from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
    print "Iteration--->", i
    testsentence = sentences[i][0]
    len_array.append(len(testsentence))
    ep = Earley()
    edges_array.append(ep.parse(testsentence))
    time_earley_array.append(ep.time(testsentence))
    bu = BottomUp()
    time_bottomup_array.append(bu.time(testsentence))
    td = TopDown()
    time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array,
         'b--')
plt.show()

Пример #13

0

Показать файл

Файл: test.py Проект: SaptakS/nlp-demo

from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
	print "Iteration--->", i
	testsentence = sentences[i][0]
	len_array.append(len(testsentence))
	ep = Earley()
	edges_array.append(ep.parse(testsentence))
	time_earley_array.append(ep.time(testsentence))
	bu = BottomUp()
	time_bottomup_array.append(bu.time(testsentence))
	td = TopDown()
	time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array, 'b--')
plt.show()

Пример #14

0

Показать файл

def test_get_answer():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    assert earley.get_answer()

    earley = Earley('ab')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert earley.get_answer()

    earley = Earley('ac')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert not earley.get_answer()

Пример #15

0

Показать файл

Файл: earleyParser.py Проект: joanSolCom/My-projects

from earley import Earley

if __name__ == '__main__':
  import sys
  import json
  
  if len(sys.argv) < 3:
    print 'Program needs to be invoked like this:\n'
    print 'python earleyParser.py grammar.json "sentence to parse"\n'
    print 'you can turn on the debug mode changing the debug variable to 1'
    print 'to see the whole process'
    print '\nExamples of phrases the program recognises:'
    print '--->I want to know about the subjects'
    print '--->tell me about the content'
    print '--->about prizing'
    print '--->enrollment'
    print '--->tell me about subjects'
    sys.exit()

  grammar_file = open(sys.argv[1])
  words = sys.argv[2].split(' ')
  grammar = json.load(grammar_file)
  
  e = Earley()
  debug = 0
  chart = e.earleyParse(words, grammar,debug)

Пример #16

0

Показать файл

def exact_sample(wcfg,
                 wfsa,
                 root='[S]',
                 goal='[GOAL]',
                 n=1,
                 intersection='nederhof'):
    """
    Sample a derivation given a wcfg and a wfsa, with exact sampling, a
    form of MC-sampling
    """
    samples = []

    if intersection == 'nederhof':
        parser = Nederhof(wcfg, wfsa)
        logging.info('Using Nederhof parser')
    elif intersection == 'earley':
        parser = Earley(wcfg, wfsa)
        logging.info('Using Earley parser')
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Parsing...')
    forest = parser.do(root, goal)

    if not forest:
        print 'NO PARSE FOUND'
        return False
    else:

        logging.debug('Forest: rules=%d', len(forest))

        logging.debug('Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Inside...')
        inside_prob = inside(forest, sorted_nodes)

        gen_sampling = GeneralisedSampling(forest, inside_prob)

        logging.debug('Sampling...')
        it = 0
        while len(samples) < n:
            it += 1
            if it % 10 == 0:
                logging.info('%d/%d', it, n)

            # retrieve a random derivation, with respect to the inside weight distribution
            d = gen_sampling.sample(goal)

            samples.append(d)

        counts = Counter(tuple(d) for d in samples)
        for d, n in counts.most_common():
            score = sum(r.log_prob for r in d)
            prob = math.exp(score - inside_prob[goal])
            print '# n=%s estimate=%s prob=%s score=%s' % (
                n, float(n) / len(samples), prob, score)
            tree = make_nltk_tree(d)
            inline_tree = inlinetree(tree)
            print inline_tree, "\n"

Python Earley примеры использования