def test_write_to_ro(self):
     file = os.path.join(self.tmpdir, "write_ro.bin")
     dataset = BinaryDs(file, features=14).open()
     dataset.close()
     with BinaryDs(file, features=14, read_only=True) as dataset:
         with self.assertRaises(IOError):
             dataset.write(self.data_raw)
Exemplo n.º 2
0
def evaluate_confusion(bs: int, file: str, fixed: int, model_path: str,
                       test_bin) -> None:
    """
    Evaluates the confusion matrix for a given number of features
    :param bs: batch size
    :param file: file where the confusion matrix will be written
    :param fixed: number of features to be considered
    :param model_path: string pointing to the .h5 keras model of the network.
    If empty will default to data_dir/model.h5
    :param test_bin: path to the test dataset that will be used
    """
    test = BinaryDs(test_bin, read_only=True).open()
    binary = test.get_categories() <= 2
    model = load_model(model_path)
    generator = DataGenerator(test, bs, fake_pad=True, pad_len=fixed,
                              predict=True)
    expected = get_expected(bs, test)
    predicted = model.predict(generator, verbose=1)
    if binary:
        predicted = np.round(predicted).flatten().astype(np.int8)
    else:
        predicted = np.argmax(predicted, axis=1)
    matrix = np.array(tf.math.confusion_matrix(expected, predicted))
    with open(file, "w") as f:
        np.savetxt(f, X=matrix, fmt="%d")
    test.close()
 def test_read_write(self):
     file = os.path.join(self.tmpdir, "rw.bin")
     binary = BinaryDs(file, features=14).open()
     binary.write(self.data_raw)
     binary.close()
     with BinaryDs(file, features=14, read_only=True) as dataset:
         read = dataset.read(0, len(self.data_raw))
     self.assertEqual(read, self.data_raw)
 def test_truncate_all(self):
     file = os.path.join(self.tmpdir, "truncate.bin")
     dataset = BinaryDs(file, features=14).open()
     dataset.close()
     empty_size = os.path.getsize(file)
     with BinaryDs(file, features=14) as dataset:
         dataset.write(self.data_raw2)
     self.assertGreater(os.path.getsize(file), empty_size)
     with BinaryDs(file, features=14) as dataset:
         dataset.truncate()
     self.assertEqual(os.path.getsize(file), empty_size)
 def test_split(self):
     file1 = os.path.join(self.tmpdir, "splitA.bin")
     file2 = os.path.join(self.tmpdir, "splitB.bin")
     dataset1 = BinaryDs(file1, features=14).open()
     dataset1.write(self.data_raw2)
     dataset2 = BinaryDs(file2, features=14).open()
     self.assertEqual(dataset1.get_examples_no(), 8)
     self.assertEqual(dataset2.get_examples_no(), 0)
     dataset1.split(dataset2, 0.5)
     self.assertEqual(dataset1.get_examples_no(), 4)
     self.assertEqual(dataset2.get_examples_no(), 4)
     self.assertEqual(dataset1.read(0, 4), self.data_raw2[:4])
     self.assertEqual(dataset2.read(0, 4), self.data_raw2[4:])
     dataset1.close()
     dataset2.close()
 def test_merge(self):
     file1 = os.path.join(self.tmpdir, "mergeA.bin")
     file2 = os.path.join(self.tmpdir, "mergeB.bin")
     dataset1 = BinaryDs(file1, features=14).open()
     dataset1.write(self.data_raw)
     dataset2 = BinaryDs(file2, features=14).open()
     dataset2.write(self.data_raw2)
     self.assertEqual(dataset1.get_examples_no(), 3)
     self.assertEqual(dataset2.get_examples_no(), 8)
     dataset1.merge(dataset2)
     self.assertEqual(dataset1.get_examples_no(), 11)
     self.assertEqual(dataset2.get_examples_no(), 0)
     self.assertEqual(dataset1.read(0, 11), self.data_raw + self.data_raw2)
     dataset1.close()
     dataset2.close()
Exemplo n.º 7
0
def evaluate_incremental(bs: int, file: str, model_path: str,
                         test_bin) -> None:
    """
    Evaluates the accuracy incrementally (first only 1 feature, then 3, then 5)
    :param bs: batch size
    :param file: file where to write the accuracy (.csv)
    :param model_path: string pointing to the .h5 keras model of the network.
    If empty will default to data_dir/model.h5
    :param test_bin: path to the test dataset that will be used
    """
    cut = 1
    test = BinaryDs(test_bin, read_only=True).open()
    model = load_model(model_path)
    features = test.get_features()
    with open(file, "w") as f:
        f.write("features,accuracy\n")
    while cut <= features:
        print(f"Evaluating {cut}")
        generator = DataGenerator(test, bs, fake_pad=True, pad_len=cut)
        score = model.evaluate(generator)
        with open(file, "a") as f:
            f.write(f"{cut},{score[1]}\n")
        if cut < 24:
            cut = cut + 2
        elif cut < 80:
            cut = cut + 22
        elif cut < 256:
            cut = cut + 33
        elif cut < 500:
            cut = cut + 61
        elif cut < features:
            cut = cut + 129
            cut = min(cut, features)
        else:
            break
    test.close()
Exemplo n.º 8
0
def run_summary(model_dir: str) -> None:
    """
    Gets a summary of the dataset contained in a directory
    :param model_dir: Path to the folder where the train.bin, test.bin and
    validate.bin can be found
    """
    assert (os.path.exists(model_dir))
    train_bin = os.path.join(model_dir, "train.bin")
    test_bin = os.path.join(model_dir, "test.bin")
    validate_bin = os.path.join(model_dir, "validate.bin")
    assert os.path.exists(train_bin), "Train dataset does not exists!"
    assert os.path.exists(test_bin), "Test dataset does not exists!"
    assert os.path.exists(validate_bin), "Validation dataset does not exists!"
    train = BinaryDs(train_bin, read_only=True).open()
    train_categories = count_categories(train)
    openc = train.is_encoded()
    features = train.get_features()
    train.close()
    val = BinaryDs(validate_bin, read_only=True).open()
    val_categories = count_categories(val)
    val.close()
    test = BinaryDs(test_bin, read_only=True).open()
    test_categories = count_categories(test)
    test.close()
    print(f"Features: {features}")
    print(f"Number of classes: {len(train_categories)}")
    if openc:
        print("Type: opcode encoded")
    else:
        print("Type: raw values")
    print("--------------------")
    for i in range(0, len(train_categories)):
        print(f"Training examples for class {i}: {train_categories[i]}")
    for i in range(0, len(val_categories)):
        print(f"Validation examples for class {i}: {val_categories[i]}")
    for i in range(0, len(test_categories)):
        print(f"Testing examples for class {i}: {test_categories[i]}")
 def test_open_wrong_features_readonly(self):
     file = os.path.join(self.tmpdir, "open_wrong_features_readonly.bin")
     dataset = BinaryDs(file, features=1024).open()
     dataset.close()
     with BinaryDs(file, features=2048, read_only=True) as dataset:
         self.assertEqual(dataset.get_features(), 1024)
 def test_open_wrong_features(self):
     file = os.path.join(self.tmpdir, "open_wrong_features.bin")
     dataset = BinaryDs(file, features=1024).open()
     dataset.close()
     with self.assertRaises(IOError):
         BinaryDs(file, features=2048).open()
 def test_wrong_encoding_readonly(self):
     file = os.path.join(self.tmpdir, "wrongenc_readonly.bin")
     dataset = BinaryDs(file, encoded=False).open()
     dataset.close()
     with BinaryDs(file, encoded=True, read_only=True) as dataset:
         self.assertFalse(dataset.is_encoded())
 def test_wrong_encoding(self):
     file = os.path.join(self.tmpdir, "wrongenc.bin")
     dataset = BinaryDs(file, encoded=False).open()
     dataset.close()
     with self.assertRaises(IOError):
         BinaryDs(file, encoded=True).open()