Exemplo n.º 1
0
def min_empirical_error(xpldata):
    """
    Given the data originally from a XPL file the minimal empirical error
    is a value threshold for overfitting reference.

    Parameters
    ----------
    xpldata : ExampleData(data, freq0, freq1, winshape, windata, filename)
            Same as xplutil returns.

    Returns
    -------
    err : double
        The error value.
    """
    w0, w1 = clf.normalize_table(xpldata.freq0, xpldata.freq1)
    err  = clf.error(w0,w1)
    return err
Exemplo n.º 2
0
def test_error():
    w0 = np.array([0.15, 0.2, 0.1, 0.03, 0.08])
    w1 = np.array([0.05, 0.0, 0.1, 0.17, 0.12])
    expected_error = w0[[2, 3, 4]].sum() + w1[[0, 1]].sum()
    e0 = cl.error(w0, w1)
    nt.assert_almost_equal(e0, expected_error)
Exemplo n.º 3
0
import cPickle as pickle
import numpy as np
import classifier
import featurizer
import gen_training_data
from scipy import sparse

numBusinessClusters = 10
numUserClusters = 10
business_data = pickle.load(open("business_data.p", "rb"))
business_clusters = featurizer.kmeans(business_data, numBusinessClusters)
pickle.dump(business_clusters["data_clusters"], open("clustered_business.p", "wb"))
user_data = gen_training_data.cluster_users(numUserClusters, 5, 800000)
user_clusters = featurizer.kmeans(user_data["training"], numUserClusters)
pickle.dump(user_clusters["data_clusters"], open("clustered_user.p", "wb"))
error = classifier.error(numUserClusters, numBusinessClusters)
print error
pickle.dump(error, open("error.p", "wb"))
# print results
# user_weights = pickle.load(open('user_weights.p', 'rb'));
# user_clusters = featurizer.kmeans(user_weights, 32);
# pickle.dump(user_clusters, open('user_clusters.p', 'wb'))
# users = user_clusters.keys();
# businesses = business_clusters.keys();
# user_ratings = {};
# predictions = [];
# w1 = 1;
# w2 = 1;
# b = 1;
# for user in users:
# 	predictions = [];