def load(file): iterator = csv.reader(open(file, "r")) data = list(iterator) class_index = utils.get_class_index(data) for i in range(len(data)): for j in range(len(data[i])): val = data[i][j] data[i][j] = float(val) if j != class_index else val return data
def get_attr_by_class_probs(self, X, y): data = utils.merge_attrs(X, y) class_index = utils.get_class_index(data) result = self.get_empty_classes_dict([]) for record in data: class_key = record[class_index] result[class_key].append(record) for key in result: class_X, class_y = utils.horizontal_split(result[key]) result[key] = self.get_attr_probs(class_X, self.empty_bins()) return result
def get_class_probs(self, X, y): data = utils.merge_attrs(X, y) class_index = utils.get_class_index(data) result = self.get_empty_classes_dict(0) for record in data: class_key = record[class_index] result[class_key] += 1 for key in result: result[key] = result[key] / len(data) return result
def get_attr_measures(self, X, y): data = utils.merge_attrs(X, y) first_attr_index = 0 last_attr_index = utils.get_class_index(data) - 1 result = [[] for _ in range(self.attr_count)] for record in data: for attr_index in range(first_attr_index, last_attr_index + 1): result[attr_index].append(record[attr_index]) for i in range(len(result)): attr_values = result[i] mean = np.mean(attr_values) std = np.std(attr_values) result[i] = [mean, std] return result