results = cls.predict(data.data, include_conflict_theta=True) #extract conflict and uncertainty and convert recommendations to pandas representation recommendations, conflict, uncertainty = zip(*results) results = results_as_dataframe(data.target, list(recommendations)) #for each row, mark correct recommendations with "1", false recommendations with "0" find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row] results = results.apply(find_matches_in_row, axis=1) #set uncertainty and conflict as multi-index results.index = pandas.MultiIndex.from_tuples(zip(conflict, uncertainty), names=["Conflict", "Uncertainty"]) #found_within: the correct service was found within X recommendations #-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears found_within = results.cumsum(axis=1) #create one plot for each cutoff conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"], prefix="found_within_", img_type=config.img_type) plot.conflict_uncertainty_scatter(found_within, conf) #not found withing: the correct service was not found within X recommendations, is the reverse of found_within not_found_within = found_within.apply(lambda col: 1-col) #create one plot for each cutoff conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"], prefix="not_found_within_", img_type=config.img_type) plot.conflict_uncertainty_scatter(not_found_within, conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") classifiers = [ NaiveBayesClassifier(data.features, data.target_names), TemporalEvidencesClassifier(data.features, data.target_names) ] #run the experiment using full dataset as training and as test data results = [] for cls in classifiers: cls = cls.fit(data.data, data.target) r = cls.predict(data.data) r = QualityMetricsCalculator(data.target, r) results.append(r.true_positives_for_all()) #want for each classifier result only the measurements for cutoff=1 results = [r.loc[1] for r in results] results = pandas.concat(results, axis=1) results.columns = [cls.name for cls in classifiers] plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type) plot.comparison_histogram(results, plot_conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config") cutoff_results_at = 15 return data, cutoff_results_at #configuration data, cutoff_results_at = houseA() #run several classifiers on the same dataset, use 10-fold cross-validation experiment = Experiment(data) experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names), name="Our method") experiment.add_classifier(NaiveBayesClassifier(data.features, data.target_names), name="Naive Bayes") experiment.add_classifier(RandomClassifier(data.features, data.target_names), name="Random") results = experiment.run(folds=10) #print and plot results results.print_quality_comparison_at_cutoff( cutoff=1, metrics=["Recall", "Precision", "F1"]) results.print_runtime_comparison() plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], img_type=config.img_type) results.plot_quality_comparison(metrics=["Recall", "Precision", "F1"], plot_config=plot_conf, cutoff_results_at=cutoff_results_at)
stats = [experiment.run_with_classifier(cls, [(train_data, test_data)]) for cls in experiment.classifiers] #combine results of all classifiers for this training dataset, keep only results for cutoff=1 quality_stats = pandas.concat([quality for quality, runtime in stats], axis=1).loc[1] results.append(quality_stats) #make one big matrix with all results and add multi-index of training sizes and training times results = pandas.concat(results, axis=1).transpose() results.index = pandas.MultiIndex.from_tuples(zip(train_sizes, train_times), names=["Size of dataset", "Elapsed time (days)"]) #print confidence intervals for interesting metrics interesting_columns = lambda metric: [(cls.name,metric, "Confidence interval") for cls in experiment.classifiers] for metric in ["Precision", "Recall", "F1"]: r = results[interesting_columns(metric)] r.columns = [cls.name for cls in experiment.classifiers] r.name = metric print metric print r #plot means for interesting metrics plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="trainsize_", img_type=config.img_type) interesting_columns = lambda metric: [(cls.name, metric, "Mean") for cls in experiment.classifiers] for metric in ["Precision", "Recall", "F1"]: r = results[interesting_columns(metric)] r.columns = [cls.name for cls in experiment.classifiers] plot.plot_train_size(r, metric, plot_conf)
of the figure still stands: the user has some observable habits after closing the frontdoor. """ import sys sys.path.append("..") import pandas from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.binning import initialize_bins from recsys.dataset import load_dataset from evaluation import plot import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") #fit classifier to dataset cls = TemporalEvidencesClassifier(data.features, data.target_names, bins=initialize_bins(0, 300, 10)) cls = cls.fit(data.data, data.target) #create visualizations of habits around each user action plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "habits"], img_type=config.img_type) for source in cls.sources.values(): observations = pandas.DataFrame(source.temporal_counts) observations.columns = data.target_names observations.index = cls.bins plot.plot_observations(source.name(), observations, plot_conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
results = results_as_dataframe(data.target, list(recommendations)) #for each row, mark correct recommendations with "1", false recommendations with "0" find_matches_in_row = lambda row: [1 if col == row.name else 0 for col in row] results = results.apply(find_matches_in_row, axis=1) #set uncertainty and conflict as multi-index results.index = pandas.MultiIndex.from_tuples( zip(conflict, uncertainty), names=["Conflict", "Uncertainty"]) #found_within: the correct service was found within X recommendations #-> apply cumulative sum on each row so that the "1" marker is set for all columns after it first appears found_within = results.cumsum(axis=1) #create one plot for each cutoff conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"], prefix="found_within_", img_type=config.img_type) plot.conflict_uncertainty_scatter(found_within, conf) #not found withing: the correct service was not found within X recommendations, is the reverse of found_within not_found_within = found_within.apply(lambda col: 1 - col) #create one plot for each cutoff conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name, "conflict-uncertainty"], prefix="not_found_within_", img_type=config.img_type) plot.conflict_uncertainty_scatter(not_found_within, conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
from recsys.classifiers.temporal import TemporalEvidencesClassifier from recsys.classifiers.bayes import NaiveBayesClassifier from recsys.dataset import load_dataset from evaluation import plot from evaluation.metrics import QualityMetricsCalculator import config #configuration data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config") classifiers = [NaiveBayesClassifier(data.features, data.target_names), TemporalEvidencesClassifier(data.features, data.target_names)] #run the experiment using full dataset as training and as test data results = [] for cls in classifiers: cls = cls.fit(data.data, data.target) r = cls.predict(data.data) r = QualityMetricsCalculator(data.target, r) results.append(r.true_positives_for_all()) #want for each classifier result only the measurements for cutoff=1 results = [r.loc[1] for r in results] results = pandas.concat(results, axis=1) results.columns = [cls.name for cls in classifiers] plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="histogram_classifiers", img_type=config.img_type) plot.comparison_histogram(results, plot_conf) print "Results can be found in the \"%s\" directory" % config.plot_directory
results.append(quality_stats) #make one big matrix with all results and add multi-index of training sizes and training times results = pandas.concat(results, axis=1).transpose() results.index = pandas.MultiIndex.from_tuples( zip(train_sizes, train_times), names=["Size of dataset", "Elapsed time (days)"]) #print confidence intervals for interesting metrics interesting_columns = lambda metric: [(cls.name, metric, "Confidence interval") for cls in experiment.classifiers] for metric in ["Precision", "Recall", "F1"]: r = results[interesting_columns(metric)] r.columns = [cls.name for cls in experiment.classifiers] r.name = metric print metric print r #plot means for interesting metrics plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], prefix="trainsize_", img_type=config.img_type) interesting_columns = lambda metric: [(cls.name, metric, "Mean") for cls in experiment.classifiers] for metric in ["Precision", "Recall", "F1"]: r = results[interesting_columns(metric)] r.columns = [cls.name for cls in experiment.classifiers] plot.plot_train_size(r, metric, plot_conf)