Пример #1
0
 def test_uniformsampler2(self):
     grammar = cfg.load_from_file("../data/cfgs/count1.cfg")
     sampler = uniformsampler.UniformSampler(grammar, 10)
     #sampler.dump()
     self.assertEqual(sampler.get("S", 0), 1)
     self.assertEqual(sampler.get("S", 1), 2)
     self.assertEqual(sampler.get("S", 2), 1)
     self.assertEqual(sampler.get("S", 3), 0)
     self.assertEqual(sampler.get("S", 5), 0)
Пример #2
0
 def test_smart_infix(self):
     grammar = cfg.load_from_file("../data/cfgs/abab2.cfg")
     # sample from the ones that don't have
     w1 = ("a1", "b1")
     nonterminal = "O"
     ig = grammar.infix_grammar_without_nt(w1, nonterminal)
     self.assertTrue(len(ig.nonterminals) > 0)
     sampler = uniformsampler.UniformSampler(ig, 20)
     self.assertEqual(sampler.get_total(3), 2)
Пример #3
0
    def test_fkp_strong_1nt(self):
        """
		Test finding strings for a given NT.
		"""
        grammar = cfg.load_from_file("../data/cfgs/abab2.cfg")
        parser = earleyparser.EarleyParser(grammar)
        sampler = uniformsampler.UniformSampler(grammar, 100)
        k = 2
        n = 5
        nonterminal = "O"
Пример #4
0
 def test_uniformsampler(self):
     grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg")
     sampler = uniformsampler.UniformSampler(grammar, 20)
     self.assertEqual(sampler.get("S", 0), 0)
     self.assertEqual(sampler.get("S", 1), 1)
     self.assertEqual(sampler.get("S", 2), 0)
     self.assertEqual(sampler.get("S", 3), 2)
     self.assertEqual(sampler.get("S", 5), 4)
     #print "about to sample."
     tree = sampler.sample(5)
     self.assertEqual(tree.width(), 5)
Пример #5
0
    def test_intersection1(self):
        grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg")
        prefix = ("ax", )
        pg = grammar.prefix_grammar(prefix)
        #print "Dump prefix grammar"
        #pg.dump()
        self.assertTrue("S" in pg.nonterminals)
        sampler = uniformsampler.UniformSampler(pg, 10)

        #print "Dumping intersected sampler"
        #sampler.dump()
        self.assertEqual(sampler.get("S", 0), 0)
Пример #6
0
 def test_context_sampler1(self):
     grammar = cfg.load_from_file("../data/cfgs/cfg6.cfg")
     us = uniformsampler.UniformSampler(grammar, 5)
     cs = uniformsampler.ContextSampler(grammar, us, 5)
     self.assertEqual(cs.index["B"][0], 0)
     self.assertEqual(cs.index["S"][0], 1)
     self.assertEqual(cs.index["B"][2], 3)
     context0 = cs.sample_context("S", 0)
     self.assertEqual(context0, ((), ()))
     for i in xrange(100):
         l, r = cs.sample_context("B", 2)
         #print l, "---", r
         self.assertEqual(len(l) + len(r), 2)
Пример #7
0
def test_one_fkp_exact(grammar, nyields):
	"""
	See if this has the exact 1-fkp.
	"""
	sampler = uniformsampler.UniformSampler(grammar, max_substring_length)
	result = dict()
	for nt in grammar.nonterminals:
		w = test_one_fkp_nt_exact(grammar,sampler,nt,nyields)
		if w:
			result[nt] = w
		else:
			logging.info("failed on %s" % nt)
			return False
	return result
Пример #8
0
def test_strong_fkp_full(grammar, k):
	"""
	Main entry point for the primal tester.
	Returns a tuple (True|False, map[ nonterminals to k-tuples of strings ])
	"""
	result = dict()
	parser = earleyparser.EarleyParser(grammar)
	sampler = uniformsampler.UniformSampler(grammar, max_substring_length)
	ncontexts = 25
	for nt in grammar.nonterminals:
		r = test_strong_fkp_nt(grammar, parser, sampler, nt, k, ncontexts, stop=True)
		if r:
			result[nt] = r
		else:
			return False
	return result
Пример #9
0
def count_one_fcp_exact(grammar, ncontexts):
	"""
	See if this has the exact 1-fcp. Use ncontexts
	"""
	sampler = uniformsampler.UniformSampler(grammar, max_substring_length)
	contextsampler = uniformsampler.ContextSampler(grammar, sampler, max_context_length)
	result = dict()
	#ok = True
	for nt in grammar.nonterminals:
		if nt in grammar.start_set and len(grammar.start_set) == 1:
			result[nt] = ((),())
		else:
			c = test_one_fcp_nt_exact(grammar,sampler, contextsampler,nt,ncontexts)
			if c:
				result[nt] = c		
	return result
Пример #10
0
def test_strong_fcp_full(grammar, k):
	"""
	Main entry point for the dual tester.

	Method:
	"""
	result = dict()
	parser = earleyparser.EarleyParser(grammar)
	sampler = uniformsampler.UniformSampler(grammar, max_substring_length)
	contextsampler = uniformsampler.ContextSampler(grammar, sampler, max_context_length)
	ncontexts = 25
	for nt in grammar.nonterminals:
		r = test_strong_fcp_nt(grammar, parser, sampler, contextsampler,  nt, k, ncontexts, stop=True)
		if r:
			print "nt", nt,  r
			result[nt] = r
		else:
			print "Fail ", nt
			return False
	return result
Пример #11
0
def test_one_fkp_nt_string_inexact(grammar, nonterminal, w, ncontexts):
	"""
	See if this nonterminal is characterised by this single substring
	which is one of its yields.

	First
	"""
	ig = grammar.infix_grammar_without_nt(w, nonterminal)
	if ig.is_empty():
		return True
	# it is noempty so sample
	sampler = uniformsampler.UniformSampler(ig, max_substring_length)
	samples = sampler.multiple_sample(ncontexts, ncontexts ** 2)
	contexts = []
	for lwr in set(samples):
		# extract the contexts
		# for each context test if it is a context of the nonterminal.
		for context in extract(lwr,w):

			contexts.append(context)
Пример #12
0
def pick_some_inside_strings(igrammar, context, nsamples):
	"""
	Pick some strings that are generated by this grammar 
	which is the intersection of a context automata.

	Method: construct sampler; fiddle about a bit to get the right length.
	Sample and then snip off.
	"""
	(left,right) = context
	ll = len(left)
	lr = len(right)
	l = len(left) + len(right)
	sampler = uniformsampler.UniformSampler(igrammar, l + max_substring_length)
	# this will not generate any strings of length less than l
	raw =  list(set(sampler.multiple_sample(nsamples, nsamples*2)))
	#print context
	#print "RAW", raw
	result = [ x[ll: len(x) -lr] for x in raw ]
	#print result
	return result
Пример #13
0
def sample_from_substring(grammar, w, n):
	"""
	return a set of n short contexts of this substring.
	
	If we can't find any then return.
	"""
	infixgrammar = grammar.infix_grammar(w)
	
	max_length = 10
	sampler = uniformsampler.UniformSampler(infixgrammar, len(w) + max_context_length)
	total = 0
	counts = []
	lengths = []
	for i in xrange(len(w), len(w) + max_context_length):
		c = sampler.get_total(i)
		counts.append(c)
		lengths.append(i)
		total += c
		if total > n * n:
			break
	else:
		# we are in some situation where the number of strings is low.
		logging.warning("Sparse contexts")
	max_attempts = n * n
	result = set()
	distribution = [x / total for x in counts]
	clengths = numpy.random.choice(lengths, max_attempts, p=distribution )
	for l in clengths:
		lwr = sampler.sample(l).collectYield()
		for context in extract(lwr,w):
			result.add(context)
		if len(result) >= n:
			return result
	else:
		logging.warning("Didn't find all of the contexts.")
		return result
Пример #14
0
factory = generatecfg.CnfFactory()
factory.number_nonterminals = 10
factory.number_terminals = 100
factory.number_binary_productions = 30
factory.number_lexical_productions = 100

number_grammars = 5
max_length = 20
samples_per_length = 10
for g in xrange(number_grammars):
    print "Grammar ", g
    x = []
    y = []
    grammar = factory.make_grammar()

    us = uniformsampler.UniformSampler(grammar, max_length)
    for l in xrange(1, max_length + 1):
        density = us.density(l)
        x.append(l)
        y.append(density)
    plt.plot(x, y, "bx-")
    x = []
    y = []
    for l in xrange(1, max_length + 1):
        density = us.string_density(l, samples_per_length)
        x.append(l)
        y.append(density)
    plt.plot(x, y, "ro-")

plt.xlabel('Length')
plt.ylabel('Density')
Пример #15
0
args = parser.parse_args()


if args.seed:
	random.seed(args.seed)
	numpy.random.seed(args.seed)

verbose = False
result_dict = {}
target_pcfg = wcfg.load_wcfg_from_file(args.input)
target_ambiguity = target_pcfg.estimate_ambiguity(samples = args.samples, maxlength = args.maxlength)
result_dict["ambiguity"] =  target_ambiguity
print("Target grammar ambiguity H( tree | word): %e" % target_ambiguity)
## Now try string denisyt using a sensible approach.
us = uniformsampler.UniformSampler(target_pcfg, args.length)
sd = us.string_density(args.length,args.samples)
print("String density: %e" % sd)
result_dict["string density"] = sd


naivesd = target_pcfg.estimate_string_density(args.length, args.samples)
print("Naive String density: %e" % naivesd)
result_dict["naive string density"] = naivesd

try:
	asymptotic_wcfg = wcfg.load_wcfg_from_file(args.output)


	## Check to see if the kernel is in fact identifiable
	ol = oracle_learner.OracleLearner(target_pcfg)
Пример #16
0
 def test_density1(self):
     grammar = cfg.load_from_file("../data/cfgs/cfg1.cfg")
     sampler = uniformsampler.UniformSampler(grammar, 10)
     density = 4.0 / (5**5)
     self.assertAlmostEqual(sampler.density(5), density)