def initialise(wcfg, wfsa, root, goal, intersection): """ Calculate a first derivation based on a simpler (thus smaller/faster) version of the grammar Thereby determining the initial conditions. Only applicable with the 'milos' grammar format, i.e. non-terminals have the form: '[P1234*2_1]' """ smaller = WCFG([]) logging.debug('Creating a smaller grammar for initial conditions...') for line in wcfg: if 0 < permutation_length(line.lhs) <= 2: smaller.add(line) elif line.lhs == root or line.lhs == '[UNK]': smaller.add(line) if intersection == 'nederhof': init_parser = Nederhof(smaller, wfsa) elif intersection == 'earley': init_parser = Earley(smaller, wfsa) else: raise NotImplementedError('I do not know this algorithm: %s' % intersection) logging.debug('Init Parsing...') init_forest = init_parser.do(root, goal) if not init_forest: print 'NO PARSE FOUND' return {} else: logging.debug('Forest: rules=%d', len(init_forest)) logging.debug('Init Topsorting...') # sort the forest sorted_nodes = top_sort(init_forest) # calculate the inside weight of the sorted forest logging.debug('Init Inside...') init_inside_prob = inside(init_forest, sorted_nodes) logging.debug('Init Sampling...') gen_sampling = GeneralisedSampling(init_forest, init_inside_prob) init_d = gen_sampling.sample(goal) return get_conditions(init_d)
def make_grammar(fsa): cfg = WCFG() cfg.add(Rule('[S]', ['[X]'], 0.0)) cfg.add(Rule('[X]', ['[X]', '[X]'], 0.0)) for word in fsa.itersymbols(): cfg.add(Rule('[X]', [word], 0.0)) return cfg