def test_cobweb():
    tree = CobwebTree()
    for i in range(40):
        data = {}
        data['a1'] = random.choice(['v1', 'v2', 'v3', 'v4'])
        data['a2'] = random.choice(['v1', 'v2', 'v3', 'v4'])
        tree.ifit(data)
    verify_counts(tree.root)
def test_cobweb_ifit():
    tree = CobwebTree()
    tree.ifit({'a': 'b'})

    assert tree.root.count == 1
    assert len(tree.root.children) == 0
    assert 'a' in tree.root.av_counts
    assert 'b' in tree.root.av_counts['a']
def test_cobweb_sanity_check():
    tree = CobwebTree()

    with pytest.raises(ValueError):
        tree._sanity_check_instance([set()])

    with pytest.raises(ValueError):
        tree._sanity_check_instance({1: 'a'})

    with pytest.raises(ValueError):
        tree._sanity_check_instance({'a': set([])})

    with pytest.raises(ValueError):
        tree._sanity_check_instance({'a': None})
def test_cobweb_clear():
    tree = CobwebTree()
    tree.ifit({'a': 'b'})
    tree.clear()

    assert tree.root.count == 0
    assert isinstance(tree.root, CobwebNode)
    assert tree == tree.root.tree
示例#5
0
from concept_formation.examples.examples_utils import avg_lines
from concept_formation.evaluation import incremental_evaluation
from concept_formation.cobweb import CobwebTree
from concept_formation.dummy import DummyTree
from concept_formation.datasets import load_mushroom

num_runs = 30
num_examples = 30
mushrooms = load_mushroom()

naive_data = incremental_evaluation(DummyTree(),
                                    mushrooms,
                                    run_length=num_examples,
                                    runs=num_runs,
                                    attr="classification")
cobweb_data = incremental_evaluation(CobwebTree(),
                                     mushrooms,
                                     run_length=num_examples,
                                     runs=num_runs,
                                     attr="classification")

cobweb_x, cobweb_y = [], []
naive_x, naive_y = [], []

for opp in range(len(cobweb_data[0])):
    for run in range(len(cobweb_data)):
        cobweb_x.append(opp)
        cobweb_y.append(cobweb_data[run][opp])

for opp in range(len(naive_data[0])):
    for run in range(len(naive_data)):
示例#6
0
# Add noise to targets
y[::5] += 1 * (0.5 - np.random.rand(8))
y2[::5] += 1 * (0.5 - np.random.rand(8))

# Create dictionaries
# Note that the y value is stored as a hidden variable because
# in this case we only want to use the X value to make predictions.
training_data = [{'X': v[0], '_y': y[i]} for i, v in enumerate(X)]
shuffle(training_data)

# Build test data
test_data = [{'X': v[0]} for i, v in enumerate(T)]
#test_data = [{'X': float(v)} for i,v in enumerate(X)]

# Fit cobweb models
cbt = CobwebTree()
cb3t = Cobweb3Tree()

cbt.fit(training_data, iterations=1)
cb3t.fit(training_data, iterations=1)
print(cb3t.root)

child = cb3t.categorize({'X': 4.16})
print(child.predict('X'))
print(child.predict('y'))

curr = child
print(curr)
while curr.parent is not None:
    curr = curr.parent
    print(curr)
示例#7
0
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import adjusted_rand_score

from concept_formation.cobweb import CobwebTree
from concept_formation.cluster import cluster
from concept_formation.datasets import load_mushroom

seed(0)
mushrooms = load_mushroom()
shuffle(mushrooms)
mushrooms = mushrooms[:150]

tree = CobwebTree()
mushrooms_no_class = [{
    a: mushroom[a]
    for a in mushroom if a != 'classification'
} for mushroom in mushrooms]
clusters = cluster(tree, mushrooms_no_class)[0]
mushroom_class = [
    mushroom[a] for mushroom in mushrooms for a in mushroom
    if a == 'classification'
]
ari = adjusted_rand_score(clusters, mushroom_class)

dv = DictVectorizer(sparse=False)
mushroom_X = dv.fit_transform(mushrooms_no_class)

pca = PCA(n_components=2)
def test_cobweb_str():
    tree = CobwebTree()
    assert str(tree) == str(tree.root)
def test_cobweb_init():
    tree = CobwebTree()
    assert isinstance(tree.root, CobwebNode)
    assert tree == tree.root.tree
def test_empty_instance():
    t = CobwebTree()
    t.ifit({'x': 1})
    t.ifit({'x': 2})
    t.categorize({})
def test_cobweb_fit():
    tree = CobwebTree()
    tree2 = CobwebTree()
    examples = [{'a': 'a'}, {'b': 'b'}, {'c': 'c'}]
    tree.fit(examples)
    tree2.fit(examples)
示例#12
0
# Add noise to targets
y[::5] += 1 * (0.5 - np.random.rand(8))
y2[::5] += 1 * (0.5 - np.random.rand(8))

# Create dictionaries
# Note that the y value is stored as a hidden variable because
# in this case we only want to use the X value to make predictions.
training_data = [{'X': v[0], '_y': y[i]} for i, v in enumerate(X)]
shuffle(training_data)

# Build test data
test_data = [{'X': v[0]} for i, v in enumerate(T)]
# test_data = [{'X': float(v)} for i,v in enumerate(X)]

# Fit cobweb models
cbt = CobwebTree()
cb3t = Cobweb3Tree()

cbt.fit(training_data, iterations=1)
cb3t.fit(training_data, iterations=1)
print(cb3t.root)

child = cb3t.categorize({'X': 4.16})
print(child.predict('X'))
print(child.predict('y'))

curr = child
print(curr)
while curr.parent is not None:
    curr = curr.parent
    print(curr)