Exemplo n.º 1
0
 def run_for_all_datasets(self, algorithm_classes, algorithm_kwargs):
     datasets = self.get_datasets()
     accuracies = np.zeros([len(datasets), len(algorithm_classes)])
     datasets_names = []
     for index, item in enumerate(datasets):
         if item.get("label") is not None:
             datasets_names.append(item["label"])
             print("Dataset: ", item.get("label"))
         else:
             datasets_names.append(item["name"])
             print("Dataset: ", item.get("name"))
         if item.get("type") == "sklearn":
             mod = __import__(self.convert_from_path_to_module(item['path']), fromlist=[item["name"]])
             klass = getattr(mod, item["name"])
             dataset = klass()
             accuracies[index] = self.accuracies_of_different_methods(dataset.data, dataset.target,
                                                                      algorithm_classes,
                                                                      algorithm_kwargs)
         else:
             with open(item['path'], 'r') as f:
                 if item.get('dtype') is None:
                     item['dtype'] = int
                 if item.get('label_index') is None:
                     item['label_index'] = None
                 else:
                     item['label_index'] = int(item['label_index'])
                 if item.get('labels_numeric') is None:
                     item['labels_numeric'] = True
                 dataset, labels = tools.load_text_file(f, label_index=item['label_index'], dtype=item['dtype'],
                                                        labels_numeric=bool(item['labels_numeric']))
                 accuracies[index] = self.accuracies_of_different_methods(dataset, labels, algorithm_classes,
                                                                          algorithm_kwargs)
     return accuracies, datasets_names
Exemplo n.º 2
0
def accuracy_for_letters():
    with open('../../data_sources/letter-recognition.data', 'r') as f:
        dataset, labels = tools.load_text_file(f,
                                               label_index=0,
                                               labels_numeric=False)
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split, num_of_bins=11)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)
Exemplo n.º 3
0
def accuracy_for_cancer():
    with open('../../data_sources/kag_risk_factors_cervical_cancer.csv',
              'r') as f:
        dataset, labels = tools.load_text_file(f, label_index=28, dtype=float)
        # labels = np.array(dataset[:,28], dtype=int)
        # dataset=np.append(dataset[:,:28], dataset[:, 29:], axis=1)
        # iris = load_iris()
        # labels -= 1
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)
Exemplo n.º 4
0
def accuracy_for_trees():
    with open('../../data_sources/covtype.csv', 'r') as f:
        dataset, labels = tools.load_text_file(f,
                                               label_index=-1,
                                               dtype=float,
                                               labels_numeric=True)
        # iris = load_iris()
        # labels -= 1
        folds = 3
        data_split, labels_split = tools.cross_validation_split(
            dataset=dataset, labels=labels, folds=folds)
        # iris = load_iris()
        print("Multinomial:")
        accuracy_of_multinomial(data_split, labels_split)
        print("Gaussian:")
        accuracy_of_gaussian(data_split, labels_split)