def run(name, t, cp=None, verbosity=1): level = {0:logging.ERROR, 1:logging.WARNING, 2:logging.INFO, 3:logging.DEBUG}[verbosity] logging.basicConfig(level=level) # learn SPN if cp is not None: knobs.cluster_penalty = cp knobs.min_instances = 2 trn, vld, tst, schema = learn.load_data(name) start = time.time() net = gens.learn_spn(trn, schema, t) #learn.smooth_network(net, vld, verbosity>0) tst_llh = net.llh(tst) vld_llh = net.llh(vld) print name, '\tt: %.5f'%t, '\tcp: ', cp, '\ttime:%.1f'%(time.time()-start), '\ttree', len(net.pot), 'va:%8.4f'%vld_llh, 'te:%8.4f'%tst_llh return net, vld_llh, tst_llh
""" from matplotlib import pyplot as plt import learn import numpy as np import math import cost_function def plot(x,y,xlabel,ylabel): plt.plot(x,y,'x') plt.xlabel(xlabel) plt.ylabel(ylabel) plt.show() d = learn.load_data("data/train_four.csv") chars_in_description = map(len,d.description) chars_in_summary = map(len,d.summary) num_votes = d.num_votes num_views = d.num_views num_comments = d.num_comments latitude = d.latitude longitude = d.longitude log_num_views = map(math.log,num_views + 1) log_mean_views = np.mean(log_num_views) mean_views = np.exp(log_mean_views) - 1 print(mean_views) num_points = len(d.num_views) def plot_multiple():