Exemplo n.º 1
0
    def _dataset_creation_blobs(self):
        self.logger.info("\tTest dataset creation")
        # generate synthetic data
        dataset = CDLRandom(n_samples=self.n_samples_tr + self.n_samples_ts,
                            n_classes=self.n_classes,
                            n_features=self.n_features,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=2,
                            random_state=0).load()

        # Split in training and test
        splitter = CTrainTestSplit(train_size=self.n_samples_tr,
                                   test_size=self.n_samples_ts,
                                   random_state=0)
        self.tr, self.ts = splitter.split(dataset)

        # Normalize the data
        nmz = CNormalizerMinMax()
        self.tr.X = nmz.fit_transform(self.tr.X)
        self.ts.X = nmz.transform(self.ts.X)

        self._tr_loader = CDataLoaderPyTorch(self.tr.X,
                                             self.tr.Y,
                                             self.batch_size,
                                             shuffle=True,
                                             transform=None).get_loader()

        self._ts_loader = CDataLoaderPyTorch(self.ts.X,
                                             self.ts.Y,
                                             self.batch_size,
                                             shuffle=False,
                                             transform=None).get_loader()
Exemplo n.º 2
0
    def _create_tr_ts():
        """Create BLOBS training and test sets."""
        ds = CDLRandom(n_samples=30, n_features=3 * 224 * 224).load()

        # Split in training and test
        splitter = CTrainTestSplit(train_size=10, test_size=20, random_state=0)
        tr, ts = splitter.split(ds)

        nmz = CNormalizerMinMax()
        tr.X = nmz.fit_transform(tr.X)
        ts.X = nmz.transform(ts.X)

        return tr, ts
Exemplo n.º 3
0
    def _dataset_creation_mnist(self):
        self.logger.info("\tTest dataset creation")
        digits = (1, 7)
        dataset = CDataLoaderMNIST().load('training', digits=digits)

        # Split in training and test
        splitter = CTrainTestSplit(train_size=self.n_samples_tr,
                                   test_size=self.n_samples_ts,
                                   random_state=0)
        self.tr, self.ts = splitter.split(dataset)

        # Normalize the data
        nmz = CNormalizerMinMax()
        self.tr.X /= 255
        self.ts.X /= 255

        transform = transforms.Lambda(lambda x: x.reshape(-1, 1, 28, 28))

        self._tr_loader = CDataLoaderPyTorch(self.tr.X,
                                             self.tr.Y,
                                             self.batch_size,
                                             shuffle=True,
                                             transform=transform).get_loader()

        self._ts_loader = CDataLoaderPyTorch(self.ts.X,
                                             self.ts.Y,
                                             self.batch_size,
                                             shuffle=False,
                                             transform=transform).get_loader()
    def _create_tr_ts(n_tr, n_ts, n_classes, n_features):
        """Create BLOBS training and test sets."""
        # generate synthetic data
        ds = CDLRandom(n_samples=n_tr + n_ts,
                       n_classes=n_classes,
                       n_features=n_features,
                       n_redundant=0, n_clusters_per_class=1,
                       class_sep=1, random_state=0).load()

        # Split in training and test
        splitter = CTrainTestSplit(train_size=n_tr,
                                   test_size=n_ts,
                                   random_state=0)
        tr, ts = splitter.split(ds)

        nmz = CNormalizerMinMax()
        tr.X = nmz.fit_transform(tr.X)
        ts.X = nmz.transform(ts.X)

        return tr, ts
    def setUpClass(cls):

        CAttackEvasionCleverhansTestCases.setUpClass()

        cls.seed = 0

        cls.y_target = None

        cls.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                           kernel=CKernelRBF(gamma=10),
                                           C=0.1,
                                           preprocess=CNormalizerMinMax())

        cls.ds = CDLRandomBlobs(n_features=0,
                                centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]],
                                cluster_std=0.01,
                                n_samples=100,
                                random_state=cls.seed).load()

        cls.clf.fit(cls.ds.X, cls.ds.Y)

        cls.x0 = CArray([0.6, 0.2])
        cls.y0 = CArray(cls.clf.predict(cls.x0))
Exemplo n.º 6
0
from fns import load_sentiment_dataset

tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset(
    classification_type='binary')
all_classes = list(np.unique(te_Y))
print(all_classes)
tr_X, tr_Y = CArray(tr_X), CArray(tr_Y)
cv_X, cv_Y = CArray(cv_X), CArray(cv_Y)
te_X, te_Y = CArray(te_X), CArray(te_Y)

ds_tr_secml = CDataset(tr_X, tr_Y)
#print(ds_tr_secml.classes, ds_tr_secml.num_classes, ds_tr_secml.num_features, ds_tr_secml.num_samples)
ds_te_secml = CDataset(te_X, te_Y)
ds_cv_secml = CDataset(cv_X, cv_Y)

normalizer = CNormalizerMinMax()
ds_tr_secml.X = normalizer.fit_transform(ds_tr_secml.X)
ds_te_secml.X = normalizer.transform(ds_te_secml.X)
ds_cv_secml.X = normalizer.transform(ds_cv_secml.X)

# =============================================================================
# #TEST WITH SKLEARN SVM
# sklearn_clf = svm.SVC(C = 1, kernel = 'rbf', gamma = 1.0)
# secml_sklearn_clf = c_classifier_sklearn.CClassifierSkLearn(sklearn_clf)
# secml_sklearn_clf.fit(ds_tr_secml)
# preds = secml_sklearn_clf.predict(ds_te_secml.X)
# metric = CMetricAccuracy()
# acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds)
# print("Accuracy on test set: {:.2%}".format(acc))
# probs = secml_sklearn_clf.predict_proba(ds_te_secml.X)       #Doesn't work
#
                         cluster_std=cluster_std,
                         n_samples=n_samples,
                         random_state=random_state).load()

n_tr = 1000  # Number of training set samples
n_ts = 100  # Number of test set samples

# Split in training and test
from secml.data.splitter import CTrainTestSplit
splitter = CTrainTestSplit(
    train_size=n_tr, test_size=n_ts, random_state=random_state)
tr, ts = splitter.split(dataset)

# Normalize the data
from secml.ml.features import CNormalizerMinMax
nmz = CNormalizerMinMax()
tr.X = nmz.fit_transform(tr.X)
ts.X = nmz.transform(ts.X)

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Creation of the multiclass classifier
from secml.ml.classifiers import CClassifierSVM
from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
from secml.ml.kernel import CKernelRBF
clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

# Parameters for the Cross-Validation procedure
xval_params = {'C': [1e-2, 0.1, 1], 'kernel.gamma': [10, 100, 1e3]}
splitter = CTrainTestSplit(train_size=setSamplesTrainingNumber +
                           setSamplesValidationNumber,
                           test_size=setSampleTestNumber,
                           random_state=random_state)

trainingValidation, test = splitter.split(dataset)

splitter = CTrainTestSplit(train_size=setSamplesTrainingNumber,
                           test_size=setSamplesValidationNumber,
                           random_state=random_state)

training, validation = splitter.split(dataset)

# Normalize the data
normalizer = CNormalizerMinMax()
training.X = normalizer.fit_transform(training.X)
validation.X = normalizer.transform(validation.X)
test.X = normalizer.transform(test.X)

# Metric to use for training and performance evaluation
metric = CMetricAccuracy()

# Creation of the multiclass classifier
classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)

# We can now fit the classifier
classifier.fit(training.X, training.Y)
print("Training of classifier complete!")

# Compute predictions on a test set