def cold_start_single_run(data, model_class, exp_params, model_params): """Function to be sent to picloud""" start = time.clock() model = model_class(**model_params) # Create model model.create_RIPL() model.observe_data(data["observations"]) # Observe data truth, missing_links = model.set_predictions(data["missing"]) # Burn in sample.collect_n_samples( model.RIPL, n=exp_params["n_samples"], mh_iter=exp_params["intermediate_iter"], ids=missing_links, max_runtime=exp_params["max_burn_time"], verbose=True, callback=return_AUC, callback_kwargs={"truth": truth}, ) # Collect samples max_memory = memory() mcmc_output = sample.collect_n_samples( model.RIPL, n=exp_params["n_samples"], mh_iter=exp_params["intermediate_iter"], ids=missing_links, max_runtime=exp_params["max_sample_time"], verbose=True, callback=return_AUC, callback_kwargs={"truth": truth}, ) samples = mcmc_output["samples"] n_samples = samples.shape[1] sample_ess = mcmc_output["ess"] max(max_memory, memory()) # Score samples predictions = list(samples.mean(axis=1)) roc_data = [] for (true_link, prediction) in zip(truth, predictions): roc_data.append((true_link, prediction)) AUC = ROCData(roc_data).auc() max_memory = max(max_memory, memory()) return { "predictions": predictions, "ess": sample_ess, "AUC": AUC, "runtime": time.clock() - start, "max_memory": max(max_memory, memory()), "n_samples": n_samples, }
def IRM(fold=1,burn=50,n_samples=100,mh_iter=1000,D=1,verbose=True): # Start timing start = time.clock() # Create RIPL and clear any previous session MyRIPL = venture_engine MyRIPL.clear() # Load high school data set data = scipy.io.loadmat("../../data/hs/hs_%dof5.mat" % fold, squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) for d in range(D): # Instantiate CRP MyRIPL.assume('alpha-%d' % d, parse('(uniform-continuous 0.0001 2.0)')) MyRIPL.assume('cluster-crp-%d' % d, parse('(CRP/make alpha-%d)' % d)) # Create class assignment lookup function MyRIPL.assume('node->class-%d' % d, parse('(mem (lambda (nodes) (cluster-crp-%d)))' % d)) # Create class interaction probability lookup function MyRIPL.assume('classes->parameters-%d' % d, parse('(mem (lambda (class1 class2) (beta 0.5 0.5)))')) # Create relation probability function MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (* ' + ' '.join(['(classes->parameters-%d (node->class-%d node1) (node->class-%d node2))' % (d,d,d) for d in range(D)]) + '))')) # Create relation evaluation function MyRIPL.assume('friends', parse('(lambda (node1 node2) (bernoulli (p-friends node1 node2)))')) # Tell Venture about observations - doubling up for symmetry without using min and max for (i,j,v) in observed: if verbose: print 'Observing (%3d, %3d) = %d' % (i, j, v) if v: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'true') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'true') else: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'false') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'false') # Tell Venture that we want to predict P(unobserved link) truth = [] missing_links = [] for (i,j,v) in missing: if verbose: print 'Predicting (%3d, %3d) = %d' % (i, j, v) truth.append(int(v)) missing_links.append(MyRIPL.predict(parse('(p-friends %d %d)' % (i, j)))) # Burn in mcmc_output = sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=1800, verbose=verbose) # Collect samples mcmc_output = sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=1800, verbose=verbose) samples = mcmc_output['samples'] sample_ess = mcmc_output['ess'] if verbose: print 'Fold complete' predictions = list(samples.mean(axis=1)) roc_data = [] for (true_link, prediction) in zip(truth, predictions): roc_data.append((true_link, prediction)) AUC = ROCData(roc_data).auc() if verbose: print 'AUC = %f' % AUC return {'truth' : truth, 'predictions' : predictions, 'samples' : samples, 'ess' : sample_ess, 'AUC' : AUC, 'runtime' : time.clock() - start}
def IRM(fold=1,burn=5,n_samples=10,mh_iter=100,verbose=True): # Start timing start = time.clock() # Create RIPL and clear any previous session MyRIPL = venture_engine MyRIPL.clear() # Load high school data set data = scipy.io.loadmat("../../data/hs/hs_%dof5.mat" % fold, squeeze_me=True) #data = scipy.io.loadmat("../../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) class_prob_mode = 'logistic-3' # Helper functions #MyRIPL.assume('exp', parse('(lambda (x) (* (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11)) (+ 1 (/ x 11))))')) #MyRIPL.assume('exp', parse('(lambda (x) (* (+ 1 (/ x 7)) (+ 1 (/ x 7)) (+ 1 (/ x 7)) (+ 1 (/ x 7)) (+ 1 (/ x 7)) (+ 1 (/ x 7)) (+ 1 (/ x 7))))')) #MyRIPL.assume('min-2', parse('(lambda (x y) (if (> x y) y x))')) #MyRIPL.assume('max-2', parse('(lambda (x y) (if (> x y) x y))')) #MyRIPL.assume('clip-p', parse('(lambda (x) (max-2 0.01 (min-2 0.99 x)))')) #MyRIPL.assume('exp', parse('(lambda (x) (expt 2.71828 x))')) MyRIPL.assume('logistic', parse('(lambda (x) (/ 1 (+ 1 (power 2.71828 (- 0 x)))))')) #MyRIPL.assume('logistic-safe', parse('(lambda (x) (max-2 0.01 (min-2 0.99 (logistic x))))')) # Instantiate CRP MyRIPL.assume('alpha', parse('(uniform-continuous 0.0001 2.0)')) MyRIPL.assume('cluster-crp-1', parse('(CRP/make alpha)')) MyRIPL.assume('cluster-crp-2', parse('(CRP/make alpha)')) MyRIPL.assume('cluster-crp-3', parse('(CRP/make alpha)')) MyRIPL.assume('cluster-crp-4', parse('(CRP/make alpha)')) MyRIPL.assume('cluster-crp-5', parse('(CRP/make alpha)')) # Create class assignment lookup function MyRIPL.assume('node->class-1', parse('(mem (lambda (nodes) (cluster-crp-1)))')) MyRIPL.assume('node->class-2', parse('(mem (lambda (nodes) (cluster-crp-2)))')) MyRIPL.assume('node->class-3', parse('(mem (lambda (nodes) (cluster-crp-3)))')) MyRIPL.assume('node->class-4', parse('(mem (lambda (nodes) (cluster-crp-4)))')) MyRIPL.assume('node->class-5', parse('(mem (lambda (nodes) (cluster-crp-5)))')) # Create class interaction probability lookup function if class_prob_mode == 'beta': MyRIPL.assume('classes->parameters', parse('(mem (lambda (class1 class2) (beta 0.5 0.5)))')) MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (classes->parameters (node->class-1 node1) (node->class-1 node2)))')) elif class_prob_mode == 'logistic': #MyRIPL.assume('sigma-b', parse('(power 2.71828 (- 2 (uniform-continuous 0.001 4)))')) #MyRIPL.assume('bias', parse('(normal 0 (+ 0.5 (power 2.71828 (- 2 (uniform-continuous 0.001 4)))))')) MyRIPL.assume('bias', parse('(normal 0 2)')) #MyRIPL.assume('classes->weights', parse('(mem (lambda (class1 class2) (normal 0 1)))')) #MyRIPL.assume('classes->parameters', parse('(lambda (class1 class2) (logistic (+ bias (classes->weights class1 class2))))')) #MyRIPL.assume('sigma', parse('(+ 0.5 (power 2.71828 (- 2 (uniform-continuous 0.001 4))))')) MyRIPL.assume('sigma', parse('1')) MyRIPL.assume('classes->parameters', parse('(mem (lambda (class1 class2) (logistic (+ bias (normal 0 sigma)))))')) MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (classes->parameters (node->class-1 node1) (node->class-1 node2)))')) elif class_prob_mode == 'logistic-2': MyRIPL.assume('bias', parse('(normal 0 2)')) MyRIPL.assume('sigma-1', parse('1')) MyRIPL.assume('classes->weights-1', parse('(mem (lambda (class1 class2) (normal 0 sigma-1)))')) MyRIPL.assume('sigma-2', parse('1')) MyRIPL.assume('classes->weights-2', parse('(mem (lambda (class1 class2) (normal 0 sigma-2)))')) MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (logistic (+ bias (classes->weights-1 (node->class-1 node1) (node->class-1 node2)) (classes->weights-2 (node->class-2 node1) (node->class-2 node2)))))')) elif class_prob_mode == 'logistic-3': MyRIPL.assume('bias', parse('(normal 0 2)')) MyRIPL.assume('sigma-1', parse('1')) MyRIPL.assume('classes->weights-1', parse('(mem (lambda (class1 class2) (normal 0 sigma-1)))')) MyRIPL.assume('sigma-2', parse('1')) MyRIPL.assume('classes->weights-2', parse('(mem (lambda (class1 class2) (normal 0 sigma-2)))')) MyRIPL.assume('sigma-3', parse('1')) MyRIPL.assume('classes->weights-3', parse('(mem (lambda (class1 class2) (normal 0 sigma-3)))')) MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (logistic (+ bias (classes->weights-1 (node->class-1 node1) (node->class-1 node2)) (classes->weights-2 (node->class-2 node1) (node->class-2 node2)) (classes->weights-3 (node->class-3 node1) (node->class-3 node2)))))')) # Create relation evaluation function MyRIPL.assume('friends', parse('(lambda (node1 node2) (bernoulli (p-friends node1 node2)))')) # Tell Venture about observations for (i,j,v) in observed: if verbose: print 'Observing (%3d, %3d) = %d' % (i, j, v) if v: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'true') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'true') else: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'false') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'false') # Tell Venture that we want to predict P(unobserved link) truth = [] missing_links = [] for (i,j,v) in missing: if verbose: print 'Predicting (%3d, %3d) = %d' % (i, j, v) truth.append(int(v)) missing_links.append(MyRIPL.predict(parse('(p-friends %d %d)' % (i, j)))) # Burn in #sample.collect_n_es(MyRIPL, n=burn, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], min_samples=n_samples*2, max_runtime=600, verbose=verbose) for unused in range(burn): mcmc_output = sample.collect_n_samples(MyRIPL, n=1, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=600, verbose=verbose) predictions = list(mcmc_output['samples'].mean(axis=1)) roc_data = [] for (true_link, prediction) in zip(truth, predictions): roc_data.append((true_link, prediction)) print ROCData(roc_data).auc() # Collect samples #mcmc_output = sample.collect_n_es(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], min_samples=n_samples*2, max_runtime=300, verbose=verbose) #mcmc_output = sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=300, verbose=verbose) for unused in range(n_samples): mcmc_output = sample.collect_n_samples(MyRIPL, n=1, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=600, verbose=verbose) predictions = list(mcmc_output['samples'].mean(axis=1)) roc_data = [] for (true_link, prediction) in zip(truth, predictions): roc_data.append((true_link, prediction)) print ROCData(roc_data).auc() #samples = mcmc_output['samples'] #sample_ess = mcmc_output['ess'] if verbose: print 'Fold complete' #predictions = list(samples.mean(axis=1)) #roc_data = [] #for (true_link, prediction) in zip(truth, predictions): # roc_data.append((true_link, prediction)) #AUC = ROCData(roc_data).auc() #if verbose: # print 'AUC = %f' % AUC #return {'truth' : truth, 'predictions' : predictions, 'samples' : samples, 'ess' : sample_ess, 'AUC' : AUC, 'runtime' : time.clock() - start} return 'Complete!'
def get_samples(void_argument): # TODO - does this import statement have to be here when using cloud? import venture_engine # Start timing start = time.clock() # Create RIPL and clear any previous session MyRIPL = venture_engine MyRIPL.clear() for d in range(D): # Instantiate CRP #MyRIPL.assume('alpha-%d' % d, parse('(uniform-continuous 0.0001 2.0)')) MyRIPL.assume('cluster-crp-%d' % d, parse('(CRP/make %f)' % alpha)) # Create class assignment lookup function MyRIPL.assume('node->class-%d' % d, parse('(mem (lambda (nodes) (cluster-crp-%d)))' % d)) # Create class interaction probability lookup function MyRIPL.assume('classes->parameters-%d' % d, parse('(mem (lambda (class1 class2) (beta (+ 0.5 (gamma 1.0 1.0)) (+ 0.5 (gamma 1.0 1.0)))))')) # Create relation probability function MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (* ' + ' '.join(['(classes->parameters-%d (node->class-%d node1) (node->class-%d node2))' % (d,d,d) for d in range(D)]) + '))')) # Create relation evaluation function MyRIPL.assume('friends', parse('(lambda (node1 node2) (bernoulli (p-friends node1 node2)))')) # Tell Venture about observations - doubling up for symmetry without using min and max for (i,j,v) in observed: if verbose: print 'Observing (%3d, %3d) = %d' % (i, j, v) if v: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'true') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'true') else: MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'false') MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'false') # Tell Venture that we want to predict P(unobserved link) truth = [] missing_links = [] for (i,j,v) in missing: if verbose: print 'Predicting (%3d, %3d) = %d' % (i, j, v) truth.append(int(v)) missing_links.append(MyRIPL.predict(parse('(p-friends %d %d)' % (i, j)))) # Burn in sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=max_runtime/2, verbose=verbose) # Collect samples mcmc_output = sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=max_runtime/2, verbose=verbose) samples = mcmc_output['samples'] sample_ess = mcmc_output['ess'] if verbose: print 'Fold complete' predictions = list(samples.mean(axis=1)) roc_data = [] for (true_link, prediction) in zip(truth, predictions): roc_data.append((true_link, prediction)) AUC = ROCData(roc_data).auc() if verbose: print 'AUC = %f' % AUC # FIXME - This need not return the truth nor the raw samples #return {'truth' : truth, 'predictions' : predictions, 'samples' : samples, 'ess' : sample_ess, 'AUC' : AUC, 'runtime' : time.clock() - start} return {'predictions' : predictions, 'ess' : sample_ess, 'AUC' : AUC, 'runtime' : time.clock() - start}