Пример #1
0
def run_experiment(index, dataset_name, name, constraint_getter, master_tree, X, y, out_dir, n_iters=1000, add_constraint=200, add_score=200,
                   add_likelihood=200, should_continue=False):


    N, D = X.shape
    df = Inverse(c=1)

    if dataset_name == 'iris':
        lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    elif dataset_name == 'zoo':
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    else:
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    if should_continue:
        with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp:
            scores = pickle.load(fp)
        with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp:
            costs = pickle.load(fp)
        with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp:
            tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
            tree.set_state(pickle.load(fp))
        sampler = MetropolisHastingsSampler(tree, X)
    else:
        scores = []
        costs = []
        tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
        sampler = MetropolisHastingsSampler(tree, X)
        sampler.initialize_assignments()
        if dataset_name == 'zoo':
            sampler.tree = sampler.tree.induced_subtree(master_tree.points())

    current_run = []
    for i in tqdm(xrange(n_iters + 1)):
        sampler.sample()
        current_run.append(sampler.tree)
        if i % add_score == 0:
            scores.append(dist(master_tree, sampler.tree))
        if i % add_likelihood == 0:
            costs.append(sampler.tree.marg_log_likelihood())
        if i != 0 and i % add_constraint == 0:
            if constraint_getter is not None:
                constraint = constraint_getter.get_constraint(current_run)
                if constraint is not None:
                    sampler.add_constraint(constraint)
            current_run = []
    # plot_tree(sampler.tree, y)

    (out_dir / name).mkdir_p()
    with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp:
        pickle.dump(scores, fp)
    print len(costs)
    with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp:
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
        # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
        # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler
Пример #2
0
    sampler_costs = [None, None]
    scores = [[], []]
    for val, sampler in enumerate([sampler1, sampler2]):
        sampler_cost = []
        for i in xrange(100):
            trees, costs = iterate(sampler, 1000)
            sampler_cost.extend(costs)

            if val == 0:
                constraint = random.choice(all_constraints)
                while constraint in satisfied[val]:
                    constraint = random.choice(all_constraints)
            elif val == 1:
                idx = np.random.permutation(list(points))[:20]
                combos = get_variance(trees, idx)
                constraint = get_constraint(master_tree, combos[0])
                id = 1
                while constraint in satisfied[val]:
                    constraint = get_constraint(combos[id])
                    id += 1
            print "Adding", val, constraint
            sampler.add_constraint(constraint)
            satisfied[val].add(constraint)
            scores[val].append(float(sampler.tree.score_constraints(test_constraints))
                                / len(test_constraints))
        sampler_costs[val] = sampler_cost

    plt.plot(sampler_costs[0], label='random')
    plt.plot(sampler_costs[1], label='entropy')
    plt.show()
Пример #3
0
                             mu0=X.mean(axis=0)).compile()

model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[])
sampler = MetropolisHastingsSampler(model, X)
sampler.initialize_assignments()

constraint_add = 500
constraint_index = 0
n_iters = 100000
score_every = 1000

likelihoods = []
scores = []
for i in tqdm(xrange(n_iters + constraint_add)):
    if i != 0 and i % constraint_add == 0:
        sampler.add_constraint(train_constraints[constraint_index])
        constraint_index += 1
    sampler.sample()
    likelihoods.append(sampler.tree.marg_log_likelihood())
    if i % score_every == 0:
        scores.append(
            float(sampler.tree.score_constraints(test_constraints)) /
            len(test_constraints))

scores.append(
    float(sampler.tree.score_constraints(test_constraints)) /
    len(test_constraints))

fontsize = 18
plt.figure()
plt.xlim([0, n_iters + constraint_add])
Пример #4
0
model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[])
sampler = MetropolisHastingsSampler(model, X)
sampler.initialize_assignments()


constraint_add = 500
constraint_index = 0
n_iters = 100000
score_every = 1000

likelihoods = []
scores = []
for i in tqdm(xrange(n_iters + constraint_add)):
    if i != 0 and i % constraint_add == 0:
        sampler.add_constraint(train_constraints[constraint_index])
        constraint_index += 1
    sampler.sample()
    likelihoods.append(sampler.tree.marg_log_likelihood())
    if i % score_every == 0:
        scores.append(float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints))

scores.append(float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints))

fontsize = 18
plt.figure()
plt.xlim([0, n_iters + constraint_add])
plt.ylim([0, 1])
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Constraint Score", fontsize=fontsize)
plt.plot(np.arange(0, n_iters + constraint_add + score_every, score_every), scores)
Пример #5
0
def run_experiment(index,
                   dataset_name,
                   name,
                   constraint_getter,
                   master_tree,
                   X,
                   y,
                   out_dir,
                   n_iters=1000,
                   add_constraint=200,
                   add_score=200,
                   add_likelihood=200,
                   should_continue=False):

    N, D = X.shape
    df = Inverse(c=1)

    if dataset_name == 'iris':
        lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    elif dataset_name == 'zoo':
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    else:
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    if should_continue:
        with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp:
            scores = pickle.load(fp)
        with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp:
            costs = pickle.load(fp)
        with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp:
            tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
            tree.set_state(pickle.load(fp))
        sampler = MetropolisHastingsSampler(tree, X)
    else:
        scores = []
        costs = []
        tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
        sampler = MetropolisHastingsSampler(tree, X)
        sampler.initialize_assignments()
        if dataset_name == 'zoo':
            sampler.tree = sampler.tree.induced_subtree(master_tree.points())

    current_run = []
    for i in tqdm(xrange(n_iters + 1)):
        sampler.sample()
        current_run.append(sampler.tree)
        if i % add_score == 0:
            scores.append(dist(master_tree, sampler.tree))
        if i % add_likelihood == 0:
            costs.append(sampler.tree.marg_log_likelihood())
        if i != 0 and i % add_constraint == 0:
            if constraint_getter is not None:
                constraint = constraint_getter.get_constraint(current_run)
                if constraint is not None:
                    sampler.add_constraint(constraint)
            current_run = []
    # plot_tree(sampler.tree, y)

    (out_dir / name).mkdir_p()
    with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp:
        pickle.dump(scores, fp)
    print len(costs)
    with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp:
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
    # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
    # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler
Пример #6
0
    scores = [[], []]
    for val, sampler in enumerate([sampler1, sampler2]):
        sampler_cost = []
        for i in range(100):
            trees, costs = iterate(sampler, 1000)
            sampler_cost.extend(costs)

            if val == 0:
                constraint = random.choice(all_constraints)
                while constraint in satisfied[val]:
                    constraint = random.choice(all_constraints)
            elif val == 1:
                idx = np.random.permutation(list(points))[:20]
                combos = get_variance(trees, idx)
                constraint = get_constraint(master_tree, combos[0])
                id = 1
                while constraint in satisfied[val]:
                    constraint = get_constraint(combos[id])
                    id += 1
            print "Adding", val, constraint
            sampler.add_constraint(constraint)
            satisfied[val].add(constraint)
            scores[val].append(
                float(sampler.tree.score_constraints(test_constraints)) /
                len(test_constraints))
        sampler_costs[val] = sampler_cost

    plt.plot(sampler_costs[0], label='random')
    plt.plot(sampler_costs[1], label='entropy')
    plt.show()