Exemplo n.º 1
0
def run_experiment(index, dataset_name, name, constraint_getter, master_tree, X, y, out_dir, n_iters=1000, add_constraint=200, add_score=200,
                   add_likelihood=200, should_continue=False):


    N, D = X.shape
    df = Inverse(c=1)

    if dataset_name == 'iris':
        lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    elif dataset_name == 'zoo':
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    else:
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()
    if should_continue:
        with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp:
            scores = pickle.load(fp)
        with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp:
            costs = pickle.load(fp)
        with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp:
            tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
            tree.set_state(pickle.load(fp))
        sampler = MetropolisHastingsSampler(tree, X)
    else:
        scores = []
        costs = []
        tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
        sampler = MetropolisHastingsSampler(tree, X)
        sampler.initialize_assignments()
        if dataset_name == 'zoo':
            sampler.tree = sampler.tree.induced_subtree(master_tree.points())

    current_run = []
    for i in tqdm(xrange(n_iters + 1)):
        sampler.sample()
        current_run.append(sampler.tree)
        if i % add_score == 0:
            scores.append(dist(master_tree, sampler.tree))
        if i % add_likelihood == 0:
            costs.append(sampler.tree.marg_log_likelihood())
        if i != 0 and i % add_constraint == 0:
            if constraint_getter is not None:
                constraint = constraint_getter.get_constraint(current_run)
                if constraint is not None:
                    sampler.add_constraint(constraint)
            current_run = []
    # plot_tree(sampler.tree, y)

    (out_dir / name).mkdir_p()
    with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp:
        pickle.dump(scores, fp)
    print len(costs)
    with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp:
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
        # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
        # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler
Exemplo n.º 2
0
X = X[idx]
y = y[idx]

constraints = set()

for constraint in interactor.current_interactions:
    a, b, c = constraint
    if a not in idx_map or b not in idx_map or c not in idx_map:
        continue
    constraints.add(tuple(map(lambda x: idx_map[x], constraint)))

logging.info("Interactions: %s", constraints)
logging.info("Interactions: %u", len(constraints))

N, D = X.shape

df = Inverse(c=2)
cov = np.cov(X.T) / 4.0 + np.eye(D) * 0.001
lm = GaussianLikelihoodModel(sigma=cov, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()

model = DirichletDiffusionTree(df=df,
                               likelihood_model=lm,
                               constraints=constraints)
sampler = MetropolisHastingsSampler(model, X)
sampler.initialize_assignments()

def iterate(iters):
    for i in tqdm(xrange(iters)):
        sampler.sample()
Exemplo n.º 3
0
import numpy as np
from trees.util import plot_tree, plot_tree_2d
from trees.ddt import DirichletDiffusionTree, Inverse, GaussianLikelihoodModel
from trees.mcmc import MetropolisHastingsSampler
from tqdm import tqdm


if __name__ == "__main__":
    D = 2
    N = 100
    X = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D), size=N).astype(np.float32)
    df = Inverse(c=1)
    lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, mu0=np.zeros(D), sigma0=np.eye(D))
    ddt = DirichletDiffusionTree(df=df,
                                 likelihood_model=lm)
    mh = MetropolisHastingsSampler(ddt, X)
    mh.initialize_assignments()

    for _ in tqdm(xrange(1000)):
        mh.sample()

    plt.figure()
    plt.plot(mh.likelihoods)

    plt.figure()
    plot_tree(mh.tree)

    plt.figure()
    plot_tree_2d(mh.tree, X)

    plt.show()
Exemplo n.º 4
0
N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

N, D = X.shape
df = Inverse(c=1)

lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()

tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
sampler = MetropolisHastingsSampler(tree, X)
sampler.initialize_assignments()

D = 1.0 / squareform(pdist(X))

def plot_tree(tree):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
    plt.show()
Exemplo n.º 5
0
master_constraints = list(master_tree.generate_constraints())
random.seed(0)
random.shuffle(master_constraints)
train_constraints, test_constraints = master_constraints[:
                                                         200], master_constraints[
                                                             200:]
test_constraints = test_constraints[:10000]

df = Inverse(c=0.9)

lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0,
                             sigma0=np.eye(D) / 2.0,
                             mu0=X.mean(axis=0)).compile()

model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[])
sampler = MetropolisHastingsSampler(model, X)
sampler.initialize_assignments()

constraint_add = 500
constraint_index = 0
n_iters = 100000
score_every = 1000

likelihoods = []
scores = []
for i in tqdm(xrange(n_iters + constraint_add)):
    if i != 0 and i % constraint_add == 0:
        sampler.add_constraint(train_constraints[constraint_index])
        constraint_index += 1
    sampler.sample()
    likelihoods.append(sampler.tree.marg_log_likelihood())
Exemplo n.º 6
0
with open("../../scripts/zoo.tree", "rb") as fp:
    master_tree = pickle.load(fp)

master_constraints = list(master_tree.generate_constraints())
random.seed(0)
random.shuffle(master_constraints)
train_constraints, test_constraints = master_constraints[:200], master_constraints[200:]
test_constraints = test_constraints[:10000]

df = Inverse(c=0.9)

lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile()

model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[])
sampler = MetropolisHastingsSampler(model, X)
sampler.initialize_assignments()


constraint_add = 500
constraint_index = 0
n_iters = 100000
score_every = 1000

likelihoods = []
scores = []
for i in tqdm(xrange(n_iters + constraint_add)):
    if i != 0 and i % constraint_add == 0:
        sampler.add_constraint(train_constraints[constraint_index])
        constraint_index += 1
    sampler.sample()
Exemplo n.º 7
0
from trees.util import plot_tree, plot_tree_2d
from trees.ddt import DirichletDiffusionTree, Inverse, GaussianLikelihoodModel
from trees.mcmc import MetropolisHastingsSampler
from tqdm import tqdm

if __name__ == "__main__":
    D = 2
    N = 100
    X = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D),
                                      size=N).astype(np.float32)
    df = Inverse(c=1)
    lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0,
                                 mu0=np.zeros(D),
                                 sigma0=np.eye(D))
    ddt = DirichletDiffusionTree(df=df, likelihood_model=lm)
    mh = MetropolisHastingsSampler(ddt, X)
    mh.initialize_assignments()

    for _ in tqdm(range(1000)):
        mh.sample()

    plt.figure()
    plt.plot(mh.likelihoods)

    #plt.figure()
    #plot_tree(mh.tree)

    #plt.figure()
    #plot_tree_2d(mh.tree, X)

    plt.show()
Exemplo n.º 8
0
def run_experiment(index,
                   dataset_name,
                   name,
                   constraint_getter,
                   master_tree,
                   X,
                   y,
                   out_dir,
                   n_iters=1000,
                   add_constraint=200,
                   add_score=200,
                   add_likelihood=200,
                   should_continue=False):

    N, D = X.shape
    df = Inverse(c=1)

    if dataset_name == 'iris':
        lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    elif dataset_name == 'zoo':
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    else:
        lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0,
                                     sigma0=np.eye(D) / 2.0,
                                     mu0=X.mean(axis=0)).compile()
    if should_continue:
        with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp:
            scores = pickle.load(fp)
        with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp:
            costs = pickle.load(fp)
        with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp:
            tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
            tree.set_state(pickle.load(fp))
        sampler = MetropolisHastingsSampler(tree, X)
    else:
        scores = []
        costs = []
        tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
        sampler = MetropolisHastingsSampler(tree, X)
        sampler.initialize_assignments()
        if dataset_name == 'zoo':
            sampler.tree = sampler.tree.induced_subtree(master_tree.points())

    current_run = []
    for i in tqdm(xrange(n_iters + 1)):
        sampler.sample()
        current_run.append(sampler.tree)
        if i % add_score == 0:
            scores.append(dist(master_tree, sampler.tree))
        if i % add_likelihood == 0:
            costs.append(sampler.tree.marg_log_likelihood())
        if i != 0 and i % add_constraint == 0:
            if constraint_getter is not None:
                constraint = constraint_getter.get_constraint(current_run)
                if constraint is not None:
                    sampler.add_constraint(constraint)
            current_run = []
    # plot_tree(sampler.tree, y)

    (out_dir / name).mkdir_p()
    with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp:
        pickle.dump(scores, fp)
    print len(costs)
    with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp:
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
    # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
    # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler
Exemplo n.º 9
0
    '50 constraints':
    DirichletDiffusionTree(df=df,
                           likelihood_model=lm,
                           constraints=train_constraints[:50]),
    '100 constraints':
    DirichletDiffusionTree(df=df,
                           likelihood_model=lm,
                           constraints=train_constraints[:100]),
    # '150 constraints': DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=train_constraints[:150]),
    '200 constraints':
    DirichletDiffusionTree(df=df,
                           likelihood_model=lm,
                           constraints=train_constraints),
}

samplers = {a: MetropolisHastingsSampler(d, X) for a, d in models.iteritems()}

for sampler in samplers.values():
    sampler.initialize_assignments()

score_every = 1000


def iterate(n_iters):
    scores = {a: [] for a in samplers}
    likelihoods = {a: [] for a in samplers}
    for i in tqdm(xrange(n_iters)):
        for name, sampler in samplers.items():
            sampler.sample()
            likelihoods[name].append(sampler.tree.marg_log_likelihood())
            if i % score_every == 0:
Exemplo n.º 10
0
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

N, D = X.shape
df = Inverse(c=1)

lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0,
                             sigma0=np.eye(D) / 2.0,
                             mu0=X.mean(axis=0)).compile()

tree = DirichletDiffusionTree(df=df, likelihood_model=lm)
sampler = MetropolisHastingsSampler(tree, X)
sampler.initialize_assignments()

D = 1.0 / squareform(pdist(X))


def plot_tree(tree):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
    plt.show()