def run_for_all_datasets(self, algorithm_classes, algorithm_kwargs): datasets = self.get_datasets() accuracies = np.zeros([len(datasets), len(algorithm_classes)]) datasets_names = [] for index, item in enumerate(datasets): if item.get("label") is not None: datasets_names.append(item["label"]) print("Dataset: ", item.get("label")) else: datasets_names.append(item["name"]) print("Dataset: ", item.get("name")) if item.get("type") == "sklearn": mod = __import__(self.convert_from_path_to_module(item['path']), fromlist=[item["name"]]) klass = getattr(mod, item["name"]) dataset = klass() accuracies[index] = self.accuracies_of_different_methods(dataset.data, dataset.target, algorithm_classes, algorithm_kwargs) else: with open(item['path'], 'r') as f: if item.get('dtype') is None: item['dtype'] = int if item.get('label_index') is None: item['label_index'] = None else: item['label_index'] = int(item['label_index']) if item.get('labels_numeric') is None: item['labels_numeric'] = True dataset, labels = tools.load_text_file(f, label_index=item['label_index'], dtype=item['dtype'], labels_numeric=bool(item['labels_numeric'])) accuracies[index] = self.accuracies_of_different_methods(dataset, labels, algorithm_classes, algorithm_kwargs) return accuracies, datasets_names
def accuracy_for_letters(): with open('../../data_sources/letter-recognition.data', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=0, labels_numeric=False) folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split, num_of_bins=11) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracy_for_cancer(): with open('../../data_sources/kag_risk_factors_cervical_cancer.csv', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=28, dtype=float) # labels = np.array(dataset[:,28], dtype=int) # dataset=np.append(dataset[:,:28], dataset[:, 29:], axis=1) # iris = load_iris() # labels -= 1 folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)
def accuracy_for_trees(): with open('../../data_sources/covtype.csv', 'r') as f: dataset, labels = tools.load_text_file(f, label_index=-1, dtype=float, labels_numeric=True) # iris = load_iris() # labels -= 1 folds = 3 data_split, labels_split = tools.cross_validation_split( dataset=dataset, labels=labels, folds=folds) # iris = load_iris() print("Multinomial:") accuracy_of_multinomial(data_split, labels_split) print("Gaussian:") accuracy_of_gaussian(data_split, labels_split)