示例#1
0
	def nn_classify(self, N, test_lc, train_files):
		best_matches = []
		best_distances = []
		best_files = []
		# Read index of each lc file
		upto = 0
		for filename in train_files:
			#if upto % 200 == 0:
			#	print upto
			upto += 1
			# Read all the light curve data into an array
			lc_data = open(self._testdir + '/' + filename)
			
			lc_class = filename.strip().split('_')[0]
			lc = [[], []]
			for line in lc_data:
				line = line.strip().split(',')
				lc[0].append(float(line[0]))
				lc[1].append(float(line[1]))
			lc_data.close()
			normalise(lc)
			lc = sample(lc, 400)			
			lc = distribute(lc)
			# Update the nearest neighbour
			distance = self._distance_fn(test_lc, lc)
		
			# Find insert point
			
			insert_point = 0
			found = False
			for insert_point, bd in enumerate(best_distances):
				if bd >= distance:
					found = True
					break
			if found or len(best_distances) == 0:
				best_distances.insert(insert_point, distance)
				best_matches.insert(insert_point, lc_class)
				best_files.insert(insert_point, filename)
			# Pop from the top of the list if it's too long
			if len(best_distances) > N:
				best_distances.pop()
				best_matches.pop()
				best_files.pop()
		
		# Compute nearest neighbor by majority
		near_count = {}
		for c in best_matches:
			if c not in near_count.keys():
				near_count[c] = 1
			else:
				near_count[c] += 1
		#print sorted(near_count.items(), key=itemgetter(1))
		return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]
示例#2
0
	def _corrupt(self, data, corruption):
		
		if type(corruption) == float:
			cdata = np.random.binomial(size=data.shape, n=1, p=1.-corruption) * data
		elif np.shape(np.asarray(corruption).T) == np.shape(data):
			cdata = corruption.T
		else:
			if self.layers[0].data_std is not None and self.layers[0].data_norm is not None:
				scales = np.random.uniform(low=corruption[0], high=corruption[1], size=data.shape[1])
				
				data = u.unnormalise(data, self.layers[0].data_norm[0], self.layers[0].data_norm[1])
				data = u.unstandardize(data, self.layers[0].data_std[0], self.layers[0].data_std[1])
				
				p = np.random.binomial
				noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] #* p(1, 0.5) 
				noise_maps = np.asarray(noise_maps)
				cdata = data + noise_maps.T
				
				cdata, _, _ = u.standardize(cdata, self.layers[0].data_std[0], self.layers[0].data_std[1])
				cdata, _, _ = u.normalise(cdata, self.layers[0].data_norm[0], self.layers[0].data_norm[1])
				
				# Just making sure we're not out of bounds:
				min_thr = 1e-6
				max_thr = 0.99999
				
				#if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False:
				#	print np.amin(data), np.amax(data), np.mean(data), np.std(data)
				#	print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum()
				#	print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata)
				#	print 
				cdata[cdata < min_thr] = min_thr
				cdata[cdata > max_thr] = max_thr
				
		return cdata
    def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example',
                      threshold = 65,
                      raw_load = True,
                      saved_path = None,
                      make_pdfs = True):

        self.threshold = '65' # 'sureity' threshold
        self.savestring = savestring
        if raw_load:
            self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict)
            self.dataobj.load_data()
            f = open('../'+savestring+'_saved','wb')
            pickle.dump(self.dataobj,f)

        else:
            assert saved_path != None
            self.dataobj = pickle.load(open(saved_path,'rb'))
        #print 'printing filename_list'
        #print self.dataobj.filename_list

        self.norm_data = utils.normalise(self.dataobj.data_array)
        feature_obj = FeatureExtractor(self.norm_data)
        i_features = self.classifier.imputer.transform(feature_obj.feature_array)
        iss_features = self.classifier.std_scaler.transform(i_features)
        lda_iss_features = self.lda.transform(iss_features)

        np.set_printoptions(precision=3, suppress = True)

        #self.pred_table = self.r_forest.predict_proba(iss_features)*100
        #self.preds = self.r_forest.predict(iss_features)

        self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100
        self.preds = self.r_forest_lda.predict(lda_iss_features)

        self.predslist = list(self.preds) # why need this?
        self.predslist[self.predslist == 4] = 'Baseline'
        self.max_preds = np.max(self.pred_table, axis = 1)
        #print pred_table
        self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below
        self._string_fun2()
        self._write_to_excel()
        if make_pdfs:
            self.plot_pdfs()
示例#4
0
	def _corrupt(self, data):
		
		if type(self.corruption) == float:
			cdata = np.random.binomial(size=data.shape, n=1, p=1.-self.corruption) * data
		elif np.shape(np.asarray(self.corruption).T) == np.shape(data):
			cdata = self.corruption.T
		else:
			
			if self.data_std is not None and self.data_norm is not None:
				scales = np.random.uniform(low=self.corruption[0], high=self.corruption[1], size=data.shape[1])
				
				data = u.unnormalise(data, self.data_norm[0], self.data_norm[1])
				data = u.unstandardize(data, self.data_std[0], self.data_std[1])
				
				p = np.random.binomial
				noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] # * p(1, 0.5)
				noise_maps = np.asarray(noise_maps)
				
				cdata = data + noise_maps.T
				
				cdata, _, _ = u.standardize(cdata, self.data_std[0], self.data_std[1])
				cdata, _, _ = u.normalise(cdata, self.data_norm[0], self.data_norm[1])
				
				# Just making sure we're not out of bounds:
				min_thr = 1e-6
				max_thr = 0.99999
				
				#if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False:
				#	print np.amin(data), np.amax(data), np.mean(data), np.std(data)
				#	print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum()
				cdata[cdata < min_thr] = min_thr
				cdata[cdata > max_thr] = max_thr
				
				#print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata)
			else:
				raise RuntimeError("Can't normalise the data (%s, %s). You must provide the normalisation and standardisation values. Giving up." % (self.data_std, self.data_norm))
		#print np.amin(data), np.amax(data)
		#print np.amin(cdata), np.amax(cdata)
		return cdata
import pickle
import matplotlib.pyplot as plt
import numpy as np

import utils
from network_loader import SeizureData
from relabeling_functions import relabel,reorder
from extrator import FeatureExtractor
from classifier import NetworkClassifer
from make_pdfs import plot_traces

################# Training Data ###################
reload_training = True
if reload_training:
    training_traces = utils.raw_training_load()
    training_traces_norm = utils.normalise(training_traces)
    training_data = FeatureExtractor(training_traces_norm)
    #f = open('../full_raw_training','wb')
    #pickle.dump(training_traces,f)

elif not reload_training:
    print 'skipping raw training load'
    training_traces = pickle.load(open('../full_raw_training','rb'))
    training_traces_norm = utils.normalise(training_traces)
    training_data = FeatureExtractor(training_traces_norm)
    np.savetxt('training_traces.csv',training_traces_norm,delimiter=',')

################# Training Labels and mixed event exclusion ###################
cleanup = np.loadtxt('../Training_cleanup.csv',delimiter=',')
training_labels = np.array([int(x[1]) for x in cleanup])
print training_labels.shape