for i in range(data_size): # how many in this set set_size = weighted_sample( range(1,10+1), probs=[7187, 1484, 593, 334, 297, 165, 151, 86, 105, 112] ) # get the objects in the current set s = set(sample_sets_of_objects(set_size, all_objects)) # sample according to the target if random() < ALPHA: r = WORDS[len(s)-1] else: r = weighted_sample( WORDS ) # and append the sampled utterance data.append(FunctionData(input=[s], output=r)) # convert to "FunctionData" and store return data # compute a string describing the behavior of this knower-level def get_knower_pattern(ne): out = '' resp = [ ne(set(sample_sets_of_objects(n, all_objects))) for n in xrange(1, 10)] return ''.join([str(word_to_number[x]) if (x is not None and x is not 'undef') else 'U' for x in resp]) # ============================================================================================================ # All objects -- not very exciting #here this is really just a dummy -- one type of object, which is replicated in sample_sets_of_objects all_objects = make_all_objects(shape=['duck']) # all possible data sets on 10 objects all_possible_data = [ ('', set(sample_sets_of_objects(n, all_objects))) for n in xrange(1,10) ]
# And finally, add the primitives for s in SHAPES: grammar.add_rule('BOOL', 'is_shape_', ['x', q(s)], FEATURE_WEIGHT) for c in COLORS: grammar.add_rule('BOOL', 'is_color_', ['x', q(c)], FEATURE_WEIGHT) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, make_all_objects from LOTlib.Miscellaneous import sample_one all_objects = make_all_objects(shape=SHAPES, color=COLORS) # Generator for data # Just import some defaults from LOTlib.Examples.NAND.TargetConcepts import TargetConcepts def make_data(N=20, f=TargetConcepts[0]): data = [] for _ in xrange(N): o = sample_one(all_objects) data.append(FunctionData(input=[o], output=f(o), alpha=0.90)) return data
from LOTlib.DataAndObjects import FunctionData, make_all_objects from LOTlib.Miscellaneous import sample_one from Grammar import SHAPES, COLORS # ------------------------------------------------------------------ # Set up the objects # ------------------------------------------------------------------ all_objects = make_all_objects( shape=SHAPES, color=COLORS ) # ------------------------------------------------------------------ # Generator for data # ------------------------------------------------------------------ # Just import some defaults from LOTlib.Examples.NAND.TargetConcepts import TargetConcepts def make_data(N=20, f=TargetConcepts[0]): data = [] for _ in xrange(N): o = sample_one(all_objects) data.append(FunctionData(input=[o], output=f(o), alpha=0.90)) return data
# get the objects in the current set s = set(sample_sets_of_objects(set_size, all_objects)) # sample according to the target if random() < alpha: r = WORDS[len(s) - 1] else: r = weighted_sample(WORDS) # and append the sampled utterance data.append(FunctionData(input=[s], output=r, alpha=alpha)) return data # here this is really just a dummy -- one type of object, which is replicated in sample_sets_of_objects all_objects = make_all_objects(shape=["duck"]) # all possible data sets on 10 objects all_possible_data = [("", set(sample_sets_of_objects(n, all_objects))) for n in xrange(1, 10)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar from LOTlib.Miscellaneous import q # The priors here are somewhat hierarchical by type in generation, tuned to be a little more efficient # (but the actual RR prior does not care about these probabilities) grammar = Grammar()
with open('HypothesisSpace.pkl', 'r') as f: hypotheses = list(pickle.load(f)) print "# Loaded hypotheses: ", len(hypotheses) ''' # For now, we'll just sample from the prior hypotheses = set([RationalRulesLOTHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(20)]) for h in hypotheses: print h from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['miniature', 'intermediate', 'colossal'], color=['cinnabar', 'viridian', 'cerulean'], shape=['rhombus', 'pentagon', 'dodecahedron']) data = make_data(dataset=['A', 'B']) L = [[h.compute_likelihood(dp) for h in hypotheses] for dp in data] # Store the likelihoods for visualization with open('Viz/Likelihoods_' + MODEL + '.csv', 'w') as f: lines = [] for l in L: lines.extend('\n'.join([str(x) for x in l])) lines.extend('\n') f.writelines(lines) # We'll use this to simulate the humans def human(objs, attr=['color'], value=['cinnabar'], n=200, groups=1): nyes = []
# Decide which rules to use which_rules = [r for r in grammar if r.nt not in ['START']] counts, sig2idx, prior_offset = create_counts(grammar, hypotheses, which_rules=which_rules) print "# Computed counts for each hypothesis & nonterminal" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Load the human data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['small', 'medium', 'large'], color=['red', 'green', 'blue'], shape=['square', 'triangle', 'circle']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The data that learners observed # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ data = [FunctionData(input=[Obj(size='small', color='green', shape='square')], output=True, alpha=0.99), FunctionData(input=[Obj(size='large', color='red', shape='triangle')], output=False, alpha=0.99)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Human data, we'll simulate on all objects # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NYes = [] NNo = []
range(1, 10 + 1), probs=[7187, 1484, 593, 334, 297, 165, 151, 86, 105, 112]) # get the objects in the current set s = set(sample_sets_of_objects(set_size, all_objects)) # sample according to the target if random() < alpha: r = WORDS[len(s) - 1] else: r = weighted_sample(WORDS) # and append the sampled utterance data.append(FunctionData(input=[s], output=r, alpha=alpha)) return data #here this is really just a dummy -- one type of object, which is replicated in sample_sets_of_objects all_objects = make_all_objects(shape=['duck']) # all possible data sets on 10 objects all_possible_data = [('', set(sample_sets_of_objects(n, all_objects))) for n in xrange(1, 10)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar from LOTlib.Miscellaneous import q # The priors here are somewhat hierarchical by type in generation, tuned to be a little more efficient # (but the actual RR prior does not care about these probabilities)
from LOTlib.GrammarInference.Precompute import create_counts trees = [h.value for h in hypotheses] nt2counts, sig2idx, prior_offset = create_counts(grammar, trees, log=None) print "# Computed counts for each hypothesis & nonterminal" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Load the human data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['small', 'medium', 'large'], color=['red', 'green', 'blue'], shape=['square', 'triangle', 'circle']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The data that learners observed # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ data = [FunctionData(input=[Obj(size='small', color='green', shape='square')], output=True, alpha=0.99), FunctionData(input=[Obj(size='large', color='red', shape='triangle')], output=False, alpha=0.99)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Human data, we'll simulate on all objects # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NYes = [] NNo = []
print "# Loaded hypotheses: ", len(hypotheses) ''' # For now, we'll just sample from the prior hypotheses = set([ RationalRulesLOTHypothesis(grammar=grammar, maxnodes=100) for _ in xrange(20) ]) for h in hypotheses: print h from LOTlib.DataAndObjects import make_all_objects objects = make_all_objects(size=['miniature', 'intermediate', 'colossal'], color=['cinnabar', 'viridian', 'cerulean'], shape=['rhombus', 'pentagon', 'dodecahedron']) data = make_data(dataset=['A', 'B']) L = [[h.compute_likelihood(dp) for h in hypotheses] for dp in data] # Store the likelihoods for visualization with open('Viz/Likelihoods_' + MODEL + '.csv', 'w') as f: lines = [] for l in L: lines.extend('\n'.join([str(x) for x in l])) lines.extend('\n') f.writelines(lines) # We'll use this to simulate the humans def human(objs, attr=['color'], value=['cinnabar'], n=200, groups=1): nyes = []