def p_val(N, data_name, g_size, dim): # Sensorless p_values = np.zeros((N, 3)) X, y, T, yT = import_pickled_data(data_name) for i in tqdm(xrange(N)): # Extract Sample, Lets get data Sample_normal = extract_samples(X, y, 0) scalar = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(Sample_normal) N_scaled = scalar.transform(Sample_normal) Boot_strap_normal = Boot_sample(N_scaled, 10000) Sample_T = extract_samples(T, yT, 0) T_scaled = scalar.transform(Sample_T) ### Results Distances = distance_calculation_power(Boot_strap_normal, Boot_strap_normal, dim, g_size) c = [ np.percentile(Distances, 99), np.percentile(Distances, 95), np.percentile(Distances, 90) ] Distances = distance_calculation_power(Boot_strap_normal, T_scaled, dim, g_size) p_values[i, 0] = len([1 for j in Distances if j > c[0]]) / float(10000) p_values[i, 1] = len([1 for j in Distances if j > c[1]]) / float(10000) p_values[i, 2] = len([1 for j in Distances if j > c[2]]) / float(10000) print("P value -- mean, std", np.mean(p_values, axis=0), np.std(p_values, axis=0)) return Sample_normal, Boot_strap_normal, c
def Generate_samples_each_class(N, data_name, classes): # Sensorless Distance = np.zeros((N, 3)) X, y, T, yT = import_pickled_data(data_name) for i in tqdm(xrange(N)): for j in tqdm(xrange(classes)): # Extract Sample, Lets get data Sample_normal = extract_samples(X, y, j) Boot_strap_normal = Boot_sample(Sample_normal, 10000) ##### np.savetxt('Data_Boot' + str(j) + data_name + '.csv', Boot_strap_normal, delimiter=',')
def classify_samples(XTrain, yTrain): n_classes = int(max(yTrain) + 1) pdf_d = [] for i in range(n_classes): # use grid search cross-validation to optimize the bandwidth temp = extract_samples(XTrain, yTrain, i) params = {'bandwidth': np.logspace(-1, 1, 20)} grid = GridSearchCV(KernelDensity(), params) grid.fit(temp) print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth)) kde_skl = grid.best_estimator_ pdf_d.append(kde_skl) return pdf_d
def power(N, Boot_strap_normal, Samp_Norm, data_name, c, g_size, dim, C): power = np.zeros((N, 3)) # Extract Sample, Lets get data X, y, T, yT = import_pickled_data(data_name) scalar = preprocessing.StandardScaler(with_mean=True, with_std=True).fit(Samp_Norm) for i in tqdm(xrange(N)): Rand_class = random.randint(1, C) Sample_test = extract_samples(T, yT, Rand_class) T_scaled = scalar.transform(Sample_test) ### Results Distances = distance_calculation_power(Boot_strap_normal, T_scaled, dim, g_size) power[i, 0] = len([1 for j in Distances if j < c[0]]) / float(10000) power[i, 1] = len([1 for j in Distances if j < c[1]]) / float(10000) power[i, 2] = len([1 for j in Distances if j < c[2]]) / float(10000) print("Power -- mean std", np.mean(power, axis=0), np.std(power, axis=0))
def separate_class_samples(Train, labels): classes = int(np.max(labels) + 1) Samples = [] for i in xrange(classes): Samples.append(extract_samples(Train, labels, i)) return Samples