Пример #1
0
def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data, include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations, expected_recommendations):
        assert_recommendations_equal(actual, expected)
Пример #2
0
def test_recommend():
    """
    Test that the classifier generates the correct recommendations for the test dataset.
    """

    #train the classifier and calculate recommendations
    data = load_dataset(data_file)
    cls = TemporalEvidencesClassifier(data.features, data.target_names)
    cls = cls.fit(data.data, data.target)
    actual_recommendations = cls.predict(data.data,
                                         include_conflict_theta=True)

    #load expected results from json file
    with open(recommendations_file, 'r') as infile:
        expected_recommendations = json.load(infile)

    #compare expected with actual results
    for actual, expected in zip(actual_recommendations,
                                expected_recommendations):
        assert_recommendations_equal(actual, expected)
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import results_as_dataframe
from evaluation import plot
import config


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#run the classifier on the whole dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(zip(conflict, uncertainty),
                                              names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears
Пример #4
0
sys.path.append("..")

import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import QualityMetricsCalculator


# configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
# data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")

# run the classifier on the whole dataset and calculate confusion matrix
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
matrix = QualityMetricsCalculator(data.target, results).confusion_matrix()

# format confusion matrix for pretty printing
letters = list(map(chr, list(range(97, 123)))) + list(map(chr, list(range(65, 91))))
action_to_letter = {action: letter for action, letter in zip(matrix.index, letters)}
matrix.columns = [action_to_letter[action] for action in matrix.columns]
matrix.index = ["(%s) %s" % (action_to_letter[action], action) for action in matrix.index]
matrix.index.name = "Actual action"

pandas.set_option("expand_frame_repr", False)
pandas.set_option("max_columns", 40)
print matrix
Пример #5
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.classifiers.bayes import NaiveBayesClassifier
from recsys.dataset import load_dataset
from evaluation import plot
from evaluation.metrics import QualityMetricsCalculator
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
to_compare = [1, 2, 3, 4]

#run classifier and count true positives
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data)
results = QualityMetricsCalculator(data.target,
                                   results).true_positives_for_all()

#only use the interesting cutoffs
results = results.transpose()[to_compare]
results.columns = ["cutoff=%s" % c for c in results.columns]

conf = plot.plot_config(config.plot_directory,
                        sub_dirs=[data.name],
                        prefix="histogram_cutoffs",
                        img_type=config.img_type)
plot.comparison_histogram(results, conf)
print "Results can be found in the \"%s\" directory" % config.plot_directory
Пример #6
0
import pandas

from recsys.classifiers.temporal import TemporalEvidencesClassifier
from recsys.dataset import load_dataset
from evaluation.metrics import results_as_dataframe
from evaluation import plot
import config

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")

#run the classifier on the whole dataset
cls = TemporalEvidencesClassifier(data.features, data.target_names)
cls = cls.fit(data.data, data.target)
results = cls.predict(data.data, include_conflict_theta=True)

#extract conflict and uncertainty and convert recommendations to pandas representation
recommendations, conflict, uncertainty = zip(*results)
results = results_as_dataframe(data.target, list(recommendations))

#for each row, mark correct recommendations with "1", false recommendations with "0"
find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row]
results = results.apply(find_matches_in_row, axis=1)

#set uncertainty and conflict as multi-index
results.index = pandas.MultiIndex.from_tuples(
    zip(conflict, uncertainty), names=["Conflict", "Uncertainty"])

#found_within: the correct service was found within X recommendations
#-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears