def est_MR52_RemovalOfSamples(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() cl = np.random.randint(y_test.shape[1]) samplesToKeep = np.zeros(X_train.shape[0]) for x in range(X_train.shape[0]): if (np.random.random() < .5) or y_train[x, cl] > .5: samplesToKeep[x] = 1 newXTrain = np.zeros((int(np.sum(samplesToKeep)), X_train.shape[1])) newYTrain = np.zeros((int(np.sum(samplesToKeep)), y_train.shape[1])) count = 0 for x in range(X_train.shape[0]): if samplesToKeep[x]: newXTrain[count, :] = X_train[x, :] newYTrain[count, :] = y_train[x, :] count = count + 1 newModel = p1b2_baseline_keras2.main(newXTrain, newYTrain, X_test, y_test, True) newPreds = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): if self._origPredictions[x] == cl: assert self._origPredictions[x] == newPreds[int( np.sum(samplesToKeep[:x]) )], "Removing samples from classes other than n causes classifications of n to change to other classes"
def est_MR41_AddClassByDuplicatingSamples(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() cl = np.random.randint(y_train.shape[1]) count = int(np.sum(y_train[:, cl])) #One class will remain empty but this should have no impact on the result newNumberOfClasses = (y_train.shape[1] * 2) newXTrain = np.zeros((X_train.shape[0] * 2 - count, X_train.shape[1])) newYTrain = np.zeros( (y_train.shape[0] * 2 - count, newNumberOfClasses)) newYTest = np.zeros((y_test.shape[0], newNumberOfClasses)) newXTrain[:X_train.shape[0], :] = X_train newYTrain[:y_train.shape[0], :y_train.shape[1]] = y_train newYTest[:y_test.shape[0], :y_test.shape[1]] = y_test count = y_train.shape[0] for x in range(y_train.shape[0]): if np.argmax(y_train[x, :]) != cl: newYTrain[count, np.argmax(y_train[x, :]) + y_train.shape[1]] = 1 newXTrain[count, :] = X_train[x, :] count = count + 1 newModel = p1b2_baseline_keras2.main(newXTrain, newYTrain, X_test, newYTest, True) newPreds = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): if (self._origPredictions[x] == cl): assert newPreds[ x] == cl, "adding new classes by doubling the training samples for classes other than n made our classifier worse for class n"
def test_MR51_RemovalOfClasses(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() #Class to remove cl = np.random.randint(y_test.shape[1]) trainCount = int(np.sum(y_train[:, cl])) newXTrain = np.zeros((X_train.shape[0] - trainCount, X_train.shape[1])) newYTrain = np.zeros((y_train.shape[0] - trainCount, y_train.shape[1])) count = 0 for x in range(X_train.shape[0]): if not np.argmax(y_train[x, :]) == cl: newXTrain[count, :] = X_train[x, :] newYTrain[count, :] = y_train[x, :] count = count + 1 newModel = p1b2_baseline_keras2.main(newXTrain, newYTrain, X_test, y_test, True) newPreds = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): if self._origPredictions[x] != cl: assert self._origPredictions[x] == newPreds[ x], "removing a class causes the model to change predictions for the remaining classes"
def est_MR42_AddClassesByReLabelingSamples(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() cl = np.random.randint(y_train.shape[1]) newNumberOfClasses = y_train.shape[1] * 2 newYTrain = np.zeros((y_train.shape[0], newNumberOfClasses)) #y_test will need the same shape newYTest = np.zeros((y_test.shape[0], newNumberOfClasses)) newYTest[:, :y_test.shape[1]] = y_test for x in range(y_train.shape[0]): if y_train[x, cl] == 1: newYTrain[x, cl] = 1 else: if np.random.random() < .5: newYTrain[x, np.argmax(y_train[x, :]) + y_train.shape[1]] = 1 else: newYTrain[x, np.argmax(y_train[x, :])] = 1 newModel = p1b2_baseline_keras2.main(X_train, newYTrain, X_test, newYTest, True) newPreds = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): if (self._origPredictions[x] == cl): assert newPreds[ x] == cl, "relabeling the class of training samples for classes besides n changed some classifications of n"
def est_MR32_AdditionalTrainingSample(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() n = np.random.randint(X_test.shape[1]) count = int(np.sum(X_train[:, n])) newXTrain = np.zeros((X_train.shape[0] + count, X_train.shape[1])) newYTrain = np.zeros((y_train.shape[0] + count, y_train.shape[1])) newXTrain[:X_train.shape[0], :] = X_train newYTrain[:y_train.shape[0], :] = y_train XCount = count for x in range(count): if np.argmax(y_train[x, :]) == n: newXTrain[XCount, :] = X_train[x, :] newYTrain[XCount, :] = y_train[x, :] XCount = XCount + 1 newModel = p1b2_baseline_keras2.main(newXTrain, newYTrain, X_test, y_test, True) newPreds = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): if (self._origPredictions[x] == n): assert newPreds[ x] == n, "doubling the training samples for class n changed some classifications from n to another class"
def est_MR22_AddInformativeAttribute(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() tempTrain = np.zeros((X_train.shape[0], X_train.shape[1] + 1)) tempTest = np.zeros((X_test.shape[0], X_test.shape[1] + 1)) tempTrain[:, :-1] = X_train tempTest[:, :-1] = X_test #pick a random class n = np.random.randint(10) #if a test point is associated with class n, make the new attribute 1 for x in range(X_train.shape[0]): if X_train[x, n] > .5: tempTrain[x, -1] = 1 for x in range(X_test.shape[0]): if X_test[x, n] > .5: tempTest[x, -1] = 1 newModel = p1b2_baseline_keras2.main(tempTrain, y_train, tempTest, y_test, True) newPreds = newModel.predict_classes(tempTest) for x in range(X_test.shape[0]): if (self._origPredictions[x] == n): assert newPreds[ x] == n, "adding an informative feature for class n changed previous classifications of n to another class"
def test_MR21_AddUninformativeAttribute(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() tempTrain = np.zeros((X_train.shape[0],X_train.shape[1]+1)) tempTest = np.zeros((X_test.shape[0],X_test.shape[1]+1)) tempTrain[:,:-1] = X_train tempTest[:,:-1] = X_test newModel = p1b2_baseline_keras2.main(tempTrain,y_train,tempTest,y_test,True) newPreds = newModel.predict_classes(tempTest) assert np.array_equal(self._origPredictions,newPreds), "adding an uninformative attribute changes outcome"
def differencesInAccuracy(self): #In order for this test to work, you must change the setUp to predict() in place of predict_classes() (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() newPreds = p1b2_baseline_keras2.main(X_train, y_train, X_test, y_test, True).predict(X_test) scores = p1b2.evaluate(self._origPredictions, y_test) print('old model on test data:', scores) newScores = p1b2.evaluate(newPreds, y_test) print('new model on test data:', newScores) diff = scores['accuracy'] - newScores['accuracy'] print('difference in scores:', diff)
def test_MR12_PermutationOfAttributes(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() X_train, X_test = self.__shuffleColumnsInUnison(X_train,X_test) shuffledModelPredictions = p1b2_baseline_keras2.main(X_train,y_train,X_test,y_test,True).predict_classes(X_test) #for x in range(X_test.shape[0]): # if not (self._origPredictions[x]==shuffledModelPredictions[x]): # print('mismatch:') # print(self._origPredictions[x]) # print(shuffledModelPredictions[x]) assert np.array_equal(self._origPredictions,shuffledModelPredictions), "permuting the order of the features changes the outcome of the model"
def test_MR31_ConsistenceWithRePrediction(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() n = np.random.randint(X_test.shape[0]) nPred = self._origPredictions[n] newXTrain = np.zeros((X_train.shape[0]+1,X_train.shape[1])) newXTrain[:-1,:] = X_train newXTrain[-1,:] = X_test[n,:] newYTrain = np.zeros((y_train.shape[0]+1,y_train.shape[1])) newYTrain[:-1,:] = y_train newYTrain[-1,:] = y_test[n,:] newModel = p1b2_baseline_keras2.main(newXTrain,newYTrain,X_test,y_test,True) assert (newModel.predict_classes(X_test)[n]) == nPred
def test_MR0_ConsistenceWithAffineTransform(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() numFeatures = X_train.shape[1] #Makes an array of random size filled with random numbers between 0 and numFeatures without repitition randomSubset = np.random.choice(range(numFeatures),np.random.randint(numFeatures),False) k = np.random.randint(1,100) b = np.random.randint(100) for i in range(randomSubset.shape[0]): c = randomSubset[i] X_train[:,c] = X_train[:,c]*k + b X_test[:,c] = X_test[:,c]*k + b transformedPredictions = p1b2_baseline_keras2.main(X_train,y_train,X_test,y_test,True).predict_classes(X_test) assert np.array_equal(self._origPredictions,transformedPredictions), "affine transformations change the outcome of the model"
def test_MR11_PermutationOfClassLabels(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() #print('pre permute train:') #print(y_train[50:55,:]) #print('pre permute test:') #print(y_test[50:55,:]) p = self.__permuteLabels(y_train,y_test) #print('permutation: ', p) #print('new train:') #print(y_train[50:55,:]) #print('new test:') #print(y_test[50:55,:]) newModel = p1b2_baseline_keras2.main(X_train,y_train,X_test,y_test,True) newPredictions = newModel.predict_classes(X_test) for x in range(X_test.shape[0]): assert newPredictions[x] == p[self._origPredictions[x]], "permuting class labels changes outcome of model"
def test_MR32_AdditionalTrainingSample(self): (X_train, y_train), (X_test, y_test) = self.__getCopiesOfData() n = np.random.randint(y_train.shape[1]) count = int(np.sum(y_train[:,n])) newX_train = copy.copy(X_train) newY_train = copy.copy(y_train) for x in range(X_train.shape[0]): if np.argmax(y_train[x,:]) == n: newX_train = np.vstack([newX_train,X_train[x]]) newY_train = np.vstack([newY_train,y_train[x]]) newModel = p1b2_baseline_keras2.main(newX_train,newY_train,X_test,y_test,True) newPreds = newModel.predict_classes(X_test) assert X_train.shape[0] + count == newX_train.shape[0], "Wrong size" print(n) print(newPreds) print(self._origPredictions) for x in range(X_test.shape[0]): if self._origPredictions[x] == n: assert newPreds[x] == n, "doubling the training samples for class n changed some classifications from n to another class"
def est_modelIsDeterministic(self): newPreds = p1b2_baseline_keras2.main( DeterministicResults=True).predict_classes(self.X_test) assert np.array_equal(self._origPredictions, newPreds), "Model is not deterministic"
def setUpClass(self): self._srcModel = p1b2_baseline_keras2.main(DeterministicResults=True) (self.X_train, self.y_train), (self.X_test, self.y_test) = p1b2.load_data() self._origPredictions = self._srcModel.predict_classes(self.X_test)