def nn_classify(self, N, test_lc, train_files): best_matches = [] best_distances = [] best_files = [] # Read index of each lc file upto = 0 for filename in train_files: #if upto % 200 == 0: # print upto upto += 1 # Read all the light curve data into an array lc_data = open(self._testdir + '/' + filename) lc_class = filename.strip().split('_')[0] lc = [[], []] for line in lc_data: line = line.strip().split(',') lc[0].append(float(line[0])) lc[1].append(float(line[1])) lc_data.close() normalise(lc) lc = sample(lc, 400) lc = distribute(lc) # Update the nearest neighbour distance = self._distance_fn(test_lc, lc) # Find insert point insert_point = 0 found = False for insert_point, bd in enumerate(best_distances): if bd >= distance: found = True break if found or len(best_distances) == 0: best_distances.insert(insert_point, distance) best_matches.insert(insert_point, lc_class) best_files.insert(insert_point, filename) # Pop from the top of the list if it's too long if len(best_distances) > N: best_distances.pop() best_matches.pop() best_files.pop() # Compute nearest neighbor by majority near_count = {} for c in best_matches: if c not in near_count.keys(): near_count[c] = 1 else: near_count[c] += 1 #print sorted(near_count.items(), key=itemgetter(1)) return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]
def _corrupt(self, data, corruption): if type(corruption) == float: cdata = np.random.binomial(size=data.shape, n=1, p=1.-corruption) * data elif np.shape(np.asarray(corruption).T) == np.shape(data): cdata = corruption.T else: if self.layers[0].data_std is not None and self.layers[0].data_norm is not None: scales = np.random.uniform(low=corruption[0], high=corruption[1], size=data.shape[1]) data = u.unnormalise(data, self.layers[0].data_norm[0], self.layers[0].data_norm[1]) data = u.unstandardize(data, self.layers[0].data_std[0], self.layers[0].data_std[1]) p = np.random.binomial noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] #* p(1, 0.5) noise_maps = np.asarray(noise_maps) cdata = data + noise_maps.T cdata, _, _ = u.standardize(cdata, self.layers[0].data_std[0], self.layers[0].data_std[1]) cdata, _, _ = u.normalise(cdata, self.layers[0].data_norm[0], self.layers[0].data_norm[1]) # Just making sure we're not out of bounds: min_thr = 1e-6 max_thr = 0.99999 #if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False: # print np.amin(data), np.amax(data), np.mean(data), np.std(data) # print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum() # print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata) # print cdata[cdata < min_thr] = min_thr cdata[cdata > max_thr] = max_thr return cdata
def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example', threshold = 65, raw_load = True, saved_path = None, make_pdfs = True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict) self.dataobj.load_data() f = open('../'+savestring+'_saved','wb') pickle.dump(self.dataobj,f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path,'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform(feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress = True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis = 1) #print pred_table self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs()
def _corrupt(self, data): if type(self.corruption) == float: cdata = np.random.binomial(size=data.shape, n=1, p=1.-self.corruption) * data elif np.shape(np.asarray(self.corruption).T) == np.shape(data): cdata = self.corruption.T else: if self.data_std is not None and self.data_norm is not None: scales = np.random.uniform(low=self.corruption[0], high=self.corruption[1], size=data.shape[1]) data = u.unnormalise(data, self.data_norm[0], self.data_norm[1]) data = u.unstandardize(data, self.data_std[0], self.data_std[1]) p = np.random.binomial noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] # * p(1, 0.5) noise_maps = np.asarray(noise_maps) cdata = data + noise_maps.T cdata, _, _ = u.standardize(cdata, self.data_std[0], self.data_std[1]) cdata, _, _ = u.normalise(cdata, self.data_norm[0], self.data_norm[1]) # Just making sure we're not out of bounds: min_thr = 1e-6 max_thr = 0.99999 #if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False: # print np.amin(data), np.amax(data), np.mean(data), np.std(data) # print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum() cdata[cdata < min_thr] = min_thr cdata[cdata > max_thr] = max_thr #print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata) else: raise RuntimeError("Can't normalise the data (%s, %s). You must provide the normalisation and standardisation values. Giving up." % (self.data_std, self.data_norm)) #print np.amin(data), np.amax(data) #print np.amin(cdata), np.amax(cdata) return cdata
import pickle import matplotlib.pyplot as plt import numpy as np import utils from network_loader import SeizureData from relabeling_functions import relabel,reorder from extrator import FeatureExtractor from classifier import NetworkClassifer from make_pdfs import plot_traces ################# Training Data ################### reload_training = True if reload_training: training_traces = utils.raw_training_load() training_traces_norm = utils.normalise(training_traces) training_data = FeatureExtractor(training_traces_norm) #f = open('../full_raw_training','wb') #pickle.dump(training_traces,f) elif not reload_training: print 'skipping raw training load' training_traces = pickle.load(open('../full_raw_training','rb')) training_traces_norm = utils.normalise(training_traces) training_data = FeatureExtractor(training_traces_norm) np.savetxt('training_traces.csv',training_traces_norm,delimiter=',') ################# Training Labels and mixed event exclusion ################### cleanup = np.loadtxt('../Training_cleanup.csv',delimiter=',') training_labels = np.array([int(x[1]) for x in cleanup]) print training_labels.shape