示例#1
0
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
示例#2
0
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    # pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
def calculate_aris(dataset):
    shuffle(dataset)
    dataset = dataset[:60]

    variablizer = ObjectVariablizer()
    dataset = [variablizer.transform(t) for t in dataset]

    tree = TrestleTree()
    tree.fit(dataset)

    clusters = [cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40,
                                     mod=False) for h in hueristics]
    human_labels = [ds['_human_cluster_label'] for ds in dataset]

    return [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in
            clusters]
class ScikitTrestle(object):
    def __init__(self, params=None):
        if params is None:
            self.tree = TrestleTree()
        else:
            self.tree = TrestleTree(**params)

    def ifit(self, x, y):
        x = deepcopy(x)
        x['_y_label'] = "%i" % y
        self.tree.ifit(x)

    def fit(self, X, y):
        X = deepcopy(X)
        for i, x in enumerate(X):
            x['_y_label'] = "%i" % y[i]
        self.tree.fit(X, randomize_first=False)

    def predict(self, X):
        return [int(self.tree.categorize(x).predict('_y_label')) for x in X]
示例#5
0
class ScikitTrestle(object):

    def __init__(self, **kwargs):
        self.tree = TrestleTree(**kwargs)
        self.state_format = "variablized_state"

    def ifit(self, x, y):
        x = deepcopy(x)
        x['_y_label'] = float(y)
        self.tree.ifit(x)

    def fit(self, X, y):
        X = deepcopy(X)
        for i, x in enumerate(X):
            x['_y_label'] = float(y)
        self.tree.fit(X, randomize_first=False)

    def skill_info(self, X):
        raise NotImplementedError("Not implemented Erik H. says there is a way \
             to serialize this -> TODO")

    def predict(self, X):
        return [self.tree.categorize(x).predict('_y_label') for x in X]
def calculate_aris(dataset):
    shuffle(dataset)
    dataset = dataset[:60]

    variablizer = ObjectVariablizer()
    dataset = [variablizer.transform(t) for t in dataset]

    tree = TrestleTree()
    tree.fit(dataset)

    clusters = [
        cluster_split_search(tree,
                             dataset,
                             h,
                             minsplit=1,
                             maxsplit=40,
                             mod=False) for h in hueristics
    ]
    human_labels = [dataset['_human_cluster_label'] for dataset in dataset]

    return [
        max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters
    ]
示例#7
0
from concept_formation.visualize import visualize

# These lines load up and use one of the example datasets included in the
# library if you don't have a readily available dataset to test. The rb_s_07
# dataset is similar to but not exactly the same as the one used to generate
# the figures in the paper.
from concept_formation.datasets import load_rb_s_07
from concept_formation.preprocessor import ObjectVariablizer

data = load_rb_s_07()

# As long as your data conforms to the instance representation:
# https://concept-formation.readthedocs.io/en/latest/instance_representation.html
# it can be basically anything.

# data = []

# This step is to make sure the component attributes of the instances are
# properly tagged as variable. See the instance representation link above for
# this.

# ov = ObjectVariablizer()
# data = ov.batch_transform(data)

# These three lines are the core of the process. They will fit the data and
# generate a visualization that will automatically open a browser to the view.
# If you want to embed the output in some other process, like a LearnSphere
# workflow, it would take a little more work but is easy in principle.
tree = TrestleTree()
tree.fit(data)
visualize(tree, "vizfiles")
from concept_formation.cluster import cluster_split_search
from concept_formation.cluster import AIC, BIC, AICc, CU
from concept_formation.datasets import load_rb_wb_03
from concept_formation.preprocessor import ObjectVariablizer

seed(5)

towers = load_rb_wb_03()
shuffle(towers)
towers = towers[:60]

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

tree = TrestleTree()
tree.fit(towers)

hueristics = [AIC, BIC, CU, AICc]

clusters = [
    cluster_split_search(tree, towers, h, minsplit=1, maxsplit=40, mod=False)
    for h in hueristics
]
human_labels = [tower['_human_cluster_label'] for tower in towers]

x = np.arange(len(hueristics))
y = [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters]
width = 0.45

hueristic_names = ['AIC', 'BIC', 'CU', 'AICc']
for i in range(len(clusters)):
seed(0)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression models (Decision Tree and TRESTLE)
# For TRESTLE the y attribute is hidden, so only the X is used to make
# predictions.
dtree = DecisionTreeRegressor(max_depth=3)
dtree.fit(X, y)
ttree = TrestleTree()
training_data = [{
    'x': float(X[i][0]),
    '_y': float(y[i])
} for i, v in enumerate(X)]
ttree.fit(training_data, iterations=1)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_dtree = dtree.predict(X_test)
y_trestle = [ttree.categorize({'x': float(v)}).predict('_y') for v in X_test]

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="Data")
plt.plot(X_test, y_trestle, c="g", label="TRESTLE", linewidth=2)
plt.plot(X_test, y_dtree, c="r", label="Decison Tree (Depth=3)", linewidth=2)
plt.xlabel("Data")
plt.ylabel("Target")
plt.title("TRESTLE/Decision Tree Regression")
plt.legend(loc=3)