def run_summary(model_dir: str) -> None: """ Gets a summary of the dataset contained in a directory :param model_dir: Path to the folder where the train.bin, test.bin and validate.bin can be found """ assert (os.path.exists(model_dir)) train_bin = os.path.join(model_dir, "train.bin") test_bin = os.path.join(model_dir, "test.bin") validate_bin = os.path.join(model_dir, "validate.bin") assert os.path.exists(train_bin), "Train dataset does not exists!" assert os.path.exists(test_bin), "Test dataset does not exists!" assert os.path.exists(validate_bin), "Validation dataset does not exists!" train = BinaryDs(train_bin, read_only=True).open() train_categories = count_categories(train) openc = train.is_encoded() features = train.get_features() train.close() val = BinaryDs(validate_bin, read_only=True).open() val_categories = count_categories(val) val.close() test = BinaryDs(test_bin, read_only=True).open() test_categories = count_categories(test) test.close() print(f"Features: {features}") print(f"Number of classes: {len(train_categories)}") if openc: print("Type: opcode encoded") else: print("Type: raw values") print("--------------------") for i in range(0, len(train_categories)): print(f"Training examples for class {i}: {train_categories[i]}") for i in range(0, len(val_categories)): print(f"Validation examples for class {i}: {val_categories[i]}") for i in range(0, len(test_categories)): print(f"Testing examples for class {i}: {test_categories[i]}")
def evaluate_incremental(bs: int, file: str, model_path: str, test_bin) -> None: """ Evaluates the accuracy incrementally (first only 1 feature, then 3, then 5) :param bs: batch size :param file: file where to write the accuracy (.csv) :param model_path: string pointing to the .h5 keras model of the network. If empty will default to data_dir/model.h5 :param test_bin: path to the test dataset that will be used """ cut = 1 test = BinaryDs(test_bin, read_only=True).open() model = load_model(model_path) features = test.get_features() with open(file, "w") as f: f.write("features,accuracy\n") while cut <= features: print(f"Evaluating {cut}") generator = DataGenerator(test, bs, fake_pad=True, pad_len=cut) score = model.evaluate(generator) with open(file, "a") as f: f.write(f"{cut},{score[1]}\n") if cut < 24: cut = cut + 2 elif cut < 80: cut = cut + 22 elif cut < 256: cut = cut + 33 elif cut < 500: cut = cut + 61 elif cut < features: cut = cut + 129 cut = min(cut, features) else: break test.close()
def test_open_wrong_features_readonly(self): file = os.path.join(self.tmpdir, "open_wrong_features_readonly.bin") dataset = BinaryDs(file, features=1024).open() dataset.close() with BinaryDs(file, features=2048, read_only=True) as dataset: self.assertEqual(dataset.get_features(), 1024)