示例#1
0
def run_summary(model_dir: str) -> None:
    """
    Gets a summary of the dataset contained in a directory
    :param model_dir: Path to the folder where the train.bin, test.bin and
    validate.bin can be found
    """
    assert (os.path.exists(model_dir))
    train_bin = os.path.join(model_dir, "train.bin")
    test_bin = os.path.join(model_dir, "test.bin")
    validate_bin = os.path.join(model_dir, "validate.bin")
    assert os.path.exists(train_bin), "Train dataset does not exists!"
    assert os.path.exists(test_bin), "Test dataset does not exists!"
    assert os.path.exists(validate_bin), "Validation dataset does not exists!"
    train = BinaryDs(train_bin, read_only=True).open()
    train_categories = count_categories(train)
    openc = train.is_encoded()
    features = train.get_features()
    train.close()
    val = BinaryDs(validate_bin, read_only=True).open()
    val_categories = count_categories(val)
    val.close()
    test = BinaryDs(test_bin, read_only=True).open()
    test_categories = count_categories(test)
    test.close()
    print(f"Features: {features}")
    print(f"Number of classes: {len(train_categories)}")
    if openc:
        print("Type: opcode encoded")
    else:
        print("Type: raw values")
    print("--------------------")
    for i in range(0, len(train_categories)):
        print(f"Training examples for class {i}: {train_categories[i]}")
    for i in range(0, len(val_categories)):
        print(f"Validation examples for class {i}: {val_categories[i]}")
    for i in range(0, len(test_categories)):
        print(f"Testing examples for class {i}: {test_categories[i]}")
示例#2
0
def evaluate_incremental(bs: int, file: str, model_path: str,
                         test_bin) -> None:
    """
    Evaluates the accuracy incrementally (first only 1 feature, then 3, then 5)
    :param bs: batch size
    :param file: file where to write the accuracy (.csv)
    :param model_path: string pointing to the .h5 keras model of the network.
    If empty will default to data_dir/model.h5
    :param test_bin: path to the test dataset that will be used
    """
    cut = 1
    test = BinaryDs(test_bin, read_only=True).open()
    model = load_model(model_path)
    features = test.get_features()
    with open(file, "w") as f:
        f.write("features,accuracy\n")
    while cut <= features:
        print(f"Evaluating {cut}")
        generator = DataGenerator(test, bs, fake_pad=True, pad_len=cut)
        score = model.evaluate(generator)
        with open(file, "a") as f:
            f.write(f"{cut},{score[1]}\n")
        if cut < 24:
            cut = cut + 2
        elif cut < 80:
            cut = cut + 22
        elif cut < 256:
            cut = cut + 33
        elif cut < 500:
            cut = cut + 61
        elif cut < features:
            cut = cut + 129
            cut = min(cut, features)
        else:
            break
    test.close()
 def test_open_wrong_features_readonly(self):
     file = os.path.join(self.tmpdir, "open_wrong_features_readonly.bin")
     dataset = BinaryDs(file, features=1024).open()
     dataset.close()
     with BinaryDs(file, features=2048, read_only=True) as dataset:
         self.assertEqual(dataset.get_features(), 1024)