def test_cobweb(): tree = CobwebTree() for i in range(40): data = {} data['a1'] = random.choice(['v1', 'v2', 'v3', 'v4']) data['a2'] = random.choice(['v1', 'v2', 'v3', 'v4']) tree.ifit(data) verify_counts(tree.root)
def test_cobweb_ifit(): tree = CobwebTree() tree.ifit({'a': 'b'}) assert tree.root.count == 1 assert len(tree.root.children) == 0 assert 'a' in tree.root.av_counts assert 'b' in tree.root.av_counts['a']
def test_cobweb_sanity_check(): tree = CobwebTree() with pytest.raises(ValueError): tree._sanity_check_instance([set()]) with pytest.raises(ValueError): tree._sanity_check_instance({1: 'a'}) with pytest.raises(ValueError): tree._sanity_check_instance({'a': set([])}) with pytest.raises(ValueError): tree._sanity_check_instance({'a': None})
def test_cobweb_clear(): tree = CobwebTree() tree.ifit({'a': 'b'}) tree.clear() assert tree.root.count == 0 assert isinstance(tree.root, CobwebNode) assert tree == tree.root.tree
from concept_formation.examples.examples_utils import avg_lines from concept_formation.evaluation import incremental_evaluation from concept_formation.cobweb import CobwebTree from concept_formation.dummy import DummyTree from concept_formation.datasets import load_mushroom num_runs = 30 num_examples = 30 mushrooms = load_mushroom() naive_data = incremental_evaluation(DummyTree(), mushrooms, run_length=num_examples, runs=num_runs, attr="classification") cobweb_data = incremental_evaluation(CobwebTree(), mushrooms, run_length=num_examples, runs=num_runs, attr="classification") cobweb_x, cobweb_y = [], [] naive_x, naive_y = [], [] for opp in range(len(cobweb_data[0])): for run in range(len(cobweb_data)): cobweb_x.append(opp) cobweb_y.append(cobweb_data[run][opp]) for opp in range(len(naive_data[0])): for run in range(len(naive_data)):
# Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) y2[::5] += 1 * (0.5 - np.random.rand(8)) # Create dictionaries # Note that the y value is stored as a hidden variable because # in this case we only want to use the X value to make predictions. training_data = [{'X': v[0], '_y': y[i]} for i, v in enumerate(X)] shuffle(training_data) # Build test data test_data = [{'X': v[0]} for i, v in enumerate(T)] #test_data = [{'X': float(v)} for i,v in enumerate(X)] # Fit cobweb models cbt = CobwebTree() cb3t = Cobweb3Tree() cbt.fit(training_data, iterations=1) cb3t.fit(training_data, iterations=1) print(cb3t.root) child = cb3t.categorize({'X': 4.16}) print(child.predict('X')) print(child.predict('y')) curr = child print(curr) while curr.parent is not None: curr = curr.parent print(curr)
import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.feature_extraction import DictVectorizer from sklearn.metrics import adjusted_rand_score from concept_formation.cobweb import CobwebTree from concept_formation.cluster import cluster from concept_formation.datasets import load_mushroom seed(0) mushrooms = load_mushroom() shuffle(mushrooms) mushrooms = mushrooms[:150] tree = CobwebTree() mushrooms_no_class = [{ a: mushroom[a] for a in mushroom if a != 'classification' } for mushroom in mushrooms] clusters = cluster(tree, mushrooms_no_class)[0] mushroom_class = [ mushroom[a] for mushroom in mushrooms for a in mushroom if a == 'classification' ] ari = adjusted_rand_score(clusters, mushroom_class) dv = DictVectorizer(sparse=False) mushroom_X = dv.fit_transform(mushrooms_no_class) pca = PCA(n_components=2)
def test_cobweb_str(): tree = CobwebTree() assert str(tree) == str(tree.root)
def test_cobweb_init(): tree = CobwebTree() assert isinstance(tree.root, CobwebNode) assert tree == tree.root.tree
def test_empty_instance(): t = CobwebTree() t.ifit({'x': 1}) t.ifit({'x': 2}) t.categorize({})
def test_cobweb_fit(): tree = CobwebTree() tree2 = CobwebTree() examples = [{'a': 'a'}, {'b': 'b'}, {'c': 'c'}] tree.fit(examples) tree2.fit(examples)
# Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) y2[::5] += 1 * (0.5 - np.random.rand(8)) # Create dictionaries # Note that the y value is stored as a hidden variable because # in this case we only want to use the X value to make predictions. training_data = [{'X': v[0], '_y': y[i]} for i, v in enumerate(X)] shuffle(training_data) # Build test data test_data = [{'X': v[0]} for i, v in enumerate(T)] # test_data = [{'X': float(v)} for i,v in enumerate(X)] # Fit cobweb models cbt = CobwebTree() cb3t = Cobweb3Tree() cbt.fit(training_data, iterations=1) cb3t.fit(training_data, iterations=1) print(cb3t.root) child = cb3t.categorize({'X': 4.16}) print(child.predict('X')) print(child.predict('y')) curr = child print(curr) while curr.parent is not None: curr = curr.parent print(curr)