def evaluate_confusion(bs: int, file: str, fixed: int, model_path: str, test_bin) -> None: """ Evaluates the confusion matrix for a given number of features :param bs: batch size :param file: file where the confusion matrix will be written :param fixed: number of features to be considered :param model_path: string pointing to the .h5 keras model of the network. If empty will default to data_dir/model.h5 :param test_bin: path to the test dataset that will be used """ test = BinaryDs(test_bin, read_only=True).open() binary = test.get_categories() <= 2 model = load_model(model_path) generator = DataGenerator(test, bs, fake_pad=True, pad_len=fixed, predict=True) expected = get_expected(bs, test) predicted = model.predict(generator, verbose=1) if binary: predicted = np.round(predicted).flatten().astype(np.int8) else: predicted = np.argmax(predicted, axis=1) matrix = np.array(tf.math.confusion_matrix(expected, predicted)) with open(file, "w") as f: np.savetxt(f, X=matrix, fmt="%d") test.close()
def count_categories(dataset: BinaryDs) -> List[int]: examples = dataset.get_examples_no() amount = 1000 read_total = int(examples / amount) remainder = examples % amount categories = [] for i in range(read_total): buffer = dataset.read(i * amount, amount) for val in buffer: category = val[0] while len(categories) <= category: categories.append(0) categories[category] += 1 if remainder > 0: buffer = dataset.read(read_total * amount, remainder) for val in buffer: category = val[0] while len(categories) <= category: categories.append(0) categories[category] += 1 assert len(categories) == dataset.get_categories() return categories