Пример #1
0
def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total
def main(args):

    for input_str in args.input:
        fsa = make_linear_fsa(input_str)
        cfg = make_grammar(fsa)
        parser = Earley(cfg, fsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print p, n
                total += n
            print len(counts['p'].keys()), total
Пример #3
0
def main(args):
    wcfg = WCFG(read_grammar_rules(args.grammar))
    #print 'GRAMMAR'
    #print wcfg

    for input_str in args.input:
        wfsa = make_linear_fsa(input_str)
        #print 'FSA'
        #print wfsa
        parser = Earley(wcfg, wfsa)
        forest = parser.do('[S]', '[GOAL]')
        if not forest:
            print 'NO PARSE FOUND'
            continue
        new_rules = []
        for rule in forest:
            if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
                new_rules.append(
                    Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
        [forest.add(rule) for rule in new_rules]
        print '# FOREST'
        print forest
        print

        if args.show_permutations:
            print '# PERMUTATIONS'
            counts = count_derivations(forest, '[GOAL]')
            total = 0
            for p, n in sorted(counts['p'].iteritems(), key=lambda (k, v): k):
                print 'permutation=(%s) derivations=%d' % (' '.join(
                    str(i) for i in p), n)
                total += n
            print 'permutations=%d derivations=%d' % (len(
                counts['p'].keys()), total)
            print
Пример #4
0
Файл: main.py Проект: Rigeru/NLP
def earley(sentence):
    earley = Earley()
    #earley.readCFGRules(FilePath.ROOT + "rules_small.txt")
    earley.readCFGRules(FilePath.ROOT + "rules.txt")
    #earley.showRules()
    earley.parse(sentence)
    earley.showStates()
    print("")
    earley.traceRoot()
Пример #5
0
def test_predict():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.predict(0)

    is_added = False
    to_add = Situation('S', 'a', 0, 0)
    for sit in earley.situations_dict[0]:
        if sit == to_add:
            is_added = True
            break

    assert is_added
Пример #6
0
def test_scan():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    earley.situations_dict[0].add(Situation('S', 'a', 0, 0))
    earley.scan(0, 'a')

    is_added = False
    to_add = Situation('S', 'a', 0, 1)
    for sit in earley.situations_dict[1]:
        if sit == to_add:
            is_added = True
            break

    assert is_added
Пример #7
0
def test_intersection_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    wfsa = WDFSA()
    for word in wcfg.terminals:
        wfsa.add_arc(0, 0, make_terminal(word), 0.0)
    wfsa.add_arc(0, 0, make_terminal('dog'), -0.5)
    wfsa.make_initial(0)
    wfsa.make_final(0)
    # Intersection
    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if forest.get('[NN,0-0]')[1].log_prob == -1.7039:
        print "Succeed, the earley intersection correctly changes the weight for a unigram automata"
Пример #8
0
def get_forest(input_str, wcfg):
    wfsa = make_linear_fsa(input_str)

    # print 'FSA'
    # print wfsa

    parser = Earley(wcfg, wfsa)
    forest = parser.do('[S]', '[GOAL]')
    if not forest:
        return 'NO PARSE FOUND'

    new_rules = []
    for rule in forest:
        if len(rule.rhs) > 1 and all(map(is_nonterminal, rule.rhs)):
            new_rules.append(Rule(rule.lhs, reversed(rule.rhs), rule.log_prob))
    [forest.add(rule) for rule in new_rules]

    return forest
Пример #9
0
def initialise(wcfg, wfsa, root, goal, intersection):
    """
    Calculate a first derivation based on a simpler (thus smaller/faster) version of the grammar
    Thereby determining the initial conditions.
    Only applicable with the 'milos' grammar format, i.e. non-terminals have the form: '[P1234*2_1]'
    """
    smaller = WCFG([])

    logging.debug('Creating a smaller grammar for initial conditions...')
    for line in wcfg:
        if 0 < permutation_length(line.lhs) <= 2:
            smaller.add(line)
        elif line.lhs == root or line.lhs == '[UNK]':
            smaller.add(line)

    if intersection == 'nederhof':
        init_parser = Nederhof(smaller, wfsa)
    elif intersection == 'earley':
        init_parser = Earley(smaller, wfsa)
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Init Parsing...')
    init_forest = init_parser.do(root, goal)

    if not init_forest:
        print 'NO PARSE FOUND'
        return {}
    else:
        logging.debug('Forest: rules=%d', len(init_forest))

        logging.debug('Init Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(init_forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Init Inside...')
        init_inside_prob = inside(init_forest, sorted_nodes)

        logging.debug('Init Sampling...')
        gen_sampling = GeneralisedSampling(init_forest, init_inside_prob)
        init_d = gen_sampling.sample(goal)

    return get_conditions(init_d)
Пример #10
0
def main():
    word = input('Введите распознаваемое слово\n')

    earley = Earley(word)

    n = int(input('Введите количество правил в грамматике: '))
    print('Введите правила грамматики в формате S -> aB')
    for i in range(n):
        parts = input(f'Правило {i + 1} из {n}: ').split()
        if len(parts) != 3:
            print("Неправильное правило")
            exit(1)
        earley.add_rule(Rule(parts[0], parts[2]))
    earley.add_rule(Rule('S#', 'S'))

    if earley.get_answer():
        print("YES\n")
    else:
        print("NO\n")
Пример #11
0
def test_final_weights():
    # Load the grammar
    grammar = "/home/cxiao/pcfg_sampling/examples/cfg"
    grammarfmt = "bar"
    wcfg = load_grammar(grammar, grammarfmt, transform=float)
    # Construct the wdfsa
    sentence = "the dog barks"
    wfsa = make_linear_fsa(sentence)
    # Intersection
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == 0.0:
        print "Succeed, default final weight is 0.0 in log semiring"
    wfsa.make_final(len(sentence.split()), -0.5)
    parser1 = Earley(wcfg, wfsa)
    forest1 = parser1.do('[S]', '[GOAL]')
    parser2 = Nederhof(wcfg, wfsa)
    forest2 = parser2.do('[S]', '[GOAL]')
    if forest1.get('[GOAL]')[0].log_prob == forest2.get(
            '[GOAL]')[0].log_prob == -0.5:
        print "Succeed, change final weight to -0.5 in log semiring"
Пример #12
0
from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
    print "Iteration--->", i
    testsentence = sentences[i][0]
    len_array.append(len(testsentence))
    ep = Earley()
    edges_array.append(ep.parse(testsentence))
    time_earley_array.append(ep.time(testsentence))
    bu = BottomUp()
    time_bottomup_array.append(bu.time(testsentence))
    td = TopDown()
    time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array,
         'b--')
plt.show()
Пример #13
0
from earley import Earley
from bottomup import BottomUp
from topdown import TopDown
import nltk
import matplotlib.pyplot as plt

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
#sentences.sort(lambda x,y: cmp(len(x[0]), len(y[0])))
#print sentences
len_array = []
edges_array = []
time_earley_array = []
time_topdown_array = []
time_bottomup_array = []
for i in range(8):
	print "Iteration--->", i
	testsentence = sentences[i][0]
	len_array.append(len(testsentence))
	ep = Earley()
	edges_array.append(ep.parse(testsentence))
	time_earley_array.append(ep.time(testsentence))
	bu = BottomUp()
	time_bottomup_array.append(bu.time(testsentence))
	td = TopDown()
	time_topdown_array.append(td.time(testsentence))

plt.plot(len_array, time_earley_array, 'r--', len_array, time_topdown_array, 'b--')
plt.show()
Пример #14
0
def test_get_answer():
    earley = Earley('a')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'a')]
    assert earley.get_answer()

    earley = Earley('ab')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert earley.get_answer()

    earley = Earley('ac')
    earley.rules_list = [Rule('U', 'S'), Rule('S', 'aA'), Rule('A', 'b')]
    assert not earley.get_answer()
Пример #15
0
from earley import Earley

if __name__ == '__main__':
  import sys
  import json
  
  if len(sys.argv) < 3:
    print 'Program needs to be invoked like this:\n'
    print 'python earleyParser.py grammar.json "sentence to parse"\n'
    print 'you can turn on the debug mode changing the debug variable to 1'
    print 'to see the whole process'
    print '\nExamples of phrases the program recognises:'
    print '--->I want to know about the subjects'
    print '--->tell me about the content'
    print '--->about prizing'
    print '--->enrollment'
    print '--->tell me about subjects'
    sys.exit()

  grammar_file = open(sys.argv[1])
  words = sys.argv[2].split(' ')
  grammar = json.load(grammar_file)
  
  e = Earley()
  debug = 0
  chart = e.earleyParse(words, grammar,debug)

Пример #16
0
def exact_sample(wcfg,
                 wfsa,
                 root='[S]',
                 goal='[GOAL]',
                 n=1,
                 intersection='nederhof'):
    """
    Sample a derivation given a wcfg and a wfsa, with exact sampling, a
    form of MC-sampling
    """
    samples = []

    if intersection == 'nederhof':
        parser = Nederhof(wcfg, wfsa)
        logging.info('Using Nederhof parser')
    elif intersection == 'earley':
        parser = Earley(wcfg, wfsa)
        logging.info('Using Earley parser')
    else:
        raise NotImplementedError('I do not know this algorithm: %s' %
                                  intersection)

    logging.debug('Parsing...')
    forest = parser.do(root, goal)

    if not forest:
        print 'NO PARSE FOUND'
        return False
    else:

        logging.debug('Forest: rules=%d', len(forest))

        logging.debug('Topsorting...')
        # sort the forest
        sorted_nodes = top_sort(forest)

        # calculate the inside weight of the sorted forest
        logging.debug('Inside...')
        inside_prob = inside(forest, sorted_nodes)

        gen_sampling = GeneralisedSampling(forest, inside_prob)

        logging.debug('Sampling...')
        it = 0
        while len(samples) < n:
            it += 1
            if it % 10 == 0:
                logging.info('%d/%d', it, n)

            # retrieve a random derivation, with respect to the inside weight distribution
            d = gen_sampling.sample(goal)

            samples.append(d)

        counts = Counter(tuple(d) for d in samples)
        for d, n in counts.most_common():
            score = sum(r.log_prob for r in d)
            prob = math.exp(score - inside_prob[goal])
            print '# n=%s estimate=%s prob=%s score=%s' % (
                n, float(n) / len(samples), prob, score)
            tree = make_nltk_tree(d)
            inline_tree = inlinetree(tree)
            print inline_tree, "\n"