Exemplo n.º 1
0
def initialize_experiment():
    experiment = Experiment(data)
    experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names),
                              name="Our method")
    experiment.add_classifier(NaiveBayesClassifier(data.features, data.target_names),
                              name="Naive Bayes")
    return experiment
Exemplo n.º 2
0
def initialize_experiment():
    experiment = Experiment(data)
    experiment.add_classifier(TemporalEvidencesClassifier(
        data.features, data.target_names),
                              name="Our method")
    experiment.add_classifier(NaiveBayesClassifier(data.features,
                                                   data.target_names),
                              name="Naive Bayes")
    return experiment
    """
    This dataset is partially dominated by one of the sensors, which makes the evaluation results less statistically
    sound, e.g. it leads to large confidence intervals when running 10-fold cross-validation.  
    """
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15
    return data, cutoff_results_at


#configuration
data, cutoff_results_at = houseA()

#run several classifiers on the same dataset, use 10-fold cross-validation
experiment = Experiment(data)
experiment.add_classifier(TemporalEvidencesClassifier(data.features,
                                                      data.target_names),
                          name="Our method")
experiment.add_classifier(NaiveBayesClassifier(data.features,
                                               data.target_names),
                          name="Naive Bayes")
experiment.add_classifier(RandomClassifier(data.features, data.target_names),
                          name="Random")
results = experiment.run(folds=10)

#print and plot results
results.print_quality_comparison_at_cutoff(
    cutoff=1, metrics=["Recall", "Precision", "F1"])
results.print_runtime_comparison()
plot_conf = plot.plot_config(config.plot_directory,
                             sub_dirs=[data.name],
                             img_type=config.img_type)
Exemplo n.º 4
0
sys.path.append("..") 

import pandas

from evaluation.experiment import Experiment
from evaluation.metrics import quality_metrics
from recsys.classifiers.temporal import TemporalEvidencesClassifier, configure_dynamic_cutoff
from recsys.dataset import load_dataset


#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
methods_to_test = [("Fixed cutoff", None),
                   ("dynamic cutoff=4", configure_dynamic_cutoff(1.0, 0.4, 4)),
                   ("dynamic cutoff=2", configure_dynamic_cutoff(1.0, 0.4, 2))]

#run all configured cutoffs with 10-fold cross-validation
experiment = Experiment(data)
for name, method in methods_to_test:
    experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names,
                              postprocess=method), name=name)
results = experiment.run(folds=10)

#print results
pandas.set_option('expand_frame_repr', False)
pandas.set_option('max_columns', 4)
print "Maximum 5 recommendations"
results.print_quality_comparison_at_cutoff(cutoff=5, metrics=quality_metrics)
print "Maximum 10 recommendations"
results.print_quality_comparison_at_cutoff(cutoff=10, metrics=quality_metrics)
Exemplo n.º 5
0
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
intervals_to_test = [  #test various settings for delta t_max
    ("Delta t_max=1200s", initialize_bins(start=0, end=60, width=10) +
     initialize_bins(start=60, end=1200, width=30)),
    ("Delta t_max=120s", initialize_bins(start=0, end=60, width=10) +
     initialize_bins(start=60, end=120, width=30)),
    ("Delta t_max=60s", initialize_bins(start=0, end=60, width=10)),
    ("Delta t_max=30s", initialize_bins(start=0, end=30, width=10)),
    ("Delta t_max=10s", initialize_bins(start=0, end=10, width=10)),
    #test various interval widths
    ("all intervals 2s wide", initialize_bins(start=0, end=300, width=2)),
    ("all intervals 4s wide", initialize_bins(start=0, end=300, width=4)),
    ("all intervals 6s wide", initialize_bins(start=0, end=300, width=6)),
    ("all intervals 8s wide", initialize_bins(start=0, end=300, width=8)),
    ("all intervals 30s wide", initialize_bins(start=0, end=300, width=30)),
    ("all intervals 50s wide", initialize_bins(start=0, end=300, width=50)),
    ("all intervals 100s wide", initialize_bins(start=0, end=300, width=100))
]

#run 10-fold cross-validation for each of the configured intervals
experiment = Experiment(data)
for (name, bins) in intervals_to_test:
    experiment.add_classifier(TemporalEvidencesClassifier(data.features,
                                                          data.target_names,
                                                          bins=bins),
                              name=name)
results = experiment.run(folds=10)

results.print_quality_comparison_at_cutoff(
    cutoff=1, metrics=["Recall", "Precision", "F1"])
Exemplo n.º 6
0
from recsys.classifiers.binning import initialize_bins
from recsys.dataset import load_dataset

#configuration
data = load_dataset("../datasets/houseA.csv", "../datasets/houseA.config")
intervals_to_test = [#test various settings for delta t_max
                     ("Delta t_max=1200s", initialize_bins(start=0, end=60, width=10) +
                                           initialize_bins(start=60, end=1200, width=30)),
                     ("Delta t_max=120s",  initialize_bins(start=0, end=60, width=10) +
                                           initialize_bins(start=60, end=120, width=30)),
                     ("Delta t_max=60s",   initialize_bins(start=0, end=60, width=10)),
                     ("Delta t_max=30s",   initialize_bins(start=0, end=30, width=10)),
                     ("Delta t_max=10s",   initialize_bins(start=0, end=10, width=10)),
                     #test various interval widths
                     ("all intervals 2s wide",   initialize_bins(start=0, end=300, width=2)),
                     ("all intervals 4s wide",   initialize_bins(start=0, end=300, width=4)),
                     ("all intervals 6s wide",   initialize_bins(start=0, end=300, width=6)),
                     ("all intervals 8s wide",   initialize_bins(start=0, end=300, width=8)),
                     ("all intervals 30s wide",  initialize_bins(start=0, end=300, width=30)),
                     ("all intervals 50s wide",  initialize_bins(start=0, end=300, width=50)),
                     ("all intervals 100s wide", initialize_bins(start=0, end=300, width=100))]

#run 10-fold cross-validation for each of the configured intervals
experiment = Experiment(data)
for (name, bins) in intervals_to_test:
    experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names,
                              bins=bins), name=name)
results = experiment.run(folds=10)

results.print_quality_comparison_at_cutoff(cutoff=1, metrics=["Recall", "Precision", "F1"])
    cutoff_results_at = 14
    return data, cutoff_results_at
    
def houseB():   
    """
    This dataset is partially dominated by one of the sensors, which makes the evaluation results less statistically
    sound, e.g. it leads to large confidence intervals when running 10-fold cross-validation.  
    """
    data = load_dataset("../datasets/houseB.csv", "../datasets/houseB.config")
    cutoff_results_at = 15    
    return data, cutoff_results_at

#configuration
data, cutoff_results_at = houseA()

#run several classifiers on the same dataset, use 10-fold cross-validation
experiment = Experiment(data)
experiment.add_classifier(TemporalEvidencesClassifier(data.features, data.target_names), name="Our method")
experiment.add_classifier(NaiveBayesClassifier(data.features, data.target_names), name="Naive Bayes")
experiment.add_classifier(RandomClassifier(data.features, data.target_names), name="Random")
results = experiment.run(folds=10)

#print and plot results
results.print_quality_comparison_at_cutoff(cutoff=1, metrics=["Recall", "Precision", "F1"])
results.print_runtime_comparison()
plot_conf = plot.plot_config(config.plot_directory, sub_dirs=[data.name], img_type=config.img_type)
results.plot_quality_comparison(metrics=["Recall", "Precision", "F1"], plot_config=plot_conf,
                                cutoff_results_at=cutoff_results_at)