def prepare_dataset(sentences, word_to_id, char_to_id, tag_to_id, lower, zeros): """ Prepare the dataset. Return a list of lists of dictionaries containing: - word indexes - word char indexes - tag indexes """ data = [] for s in sentences: str_words = [w[0] for w in s] words = [ word_to_id[normalise(w, lower, zeros ) if normalise(w, lower, zeros) in word_to_id else '<UNK>'] for w in str_words ] # Skip characters that are not in the training set chars = [[char_to_id[c] for c in w if c in char_to_id] for w in str_words] caps = [cap_feature(w) for w in str_words] tags = [tag_to_id[w[-1]] for w in s] pos_tags = pos_feature(str_words) data.append({ 'str_words': str_words, 'words': words, 'chars': chars, 'caps': caps, 'tags': tags, 'pos_tags': pos_tags, }) return data
def nn_classify(self, N, test_lc, train_files): best_matches = [] best_distances = [] best_files = [] # Read index of each lc file upto = 0 for filename in train_files: #if upto % 200 == 0: # print upto upto += 1 # Read all the light curve data into an array lc_data = open(self._testdir + '/' + filename) lc_class = filename.strip().split('_')[0] lc = [[], []] for line in lc_data: line = line.strip().split(',') lc[0].append(float(line[0])) lc[1].append(float(line[1])) lc_data.close() normalise(lc) lc = sample(lc, 400) lc = distribute(lc) # Update the nearest neighbour distance = self._distance_fn(test_lc, lc) # Find insert point insert_point = 0 found = False for insert_point, bd in enumerate(best_distances): if bd >= distance: found = True break if found or len(best_distances) == 0: best_distances.insert(insert_point, distance) best_matches.insert(insert_point, lc_class) best_files.insert(insert_point, filename) # Pop from the top of the list if it's too long if len(best_distances) > N: best_distances.pop() best_matches.pop() best_files.pop() # Compute nearest neighbor by majority near_count = {} for c in best_matches: if c not in near_count.keys(): near_count[c] = 1 else: near_count[c] += 1 #print sorted(near_count.items(), key=itemgetter(1)) return [ sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files ]
def nn_classify(self, N, test_lc, train_files): best_matches = [] best_distances = [] best_files = [] # Read index of each lc file upto = 0 for filename in train_files: #if upto % 200 == 0: # print upto upto += 1 # Read all the light curve data into an array lc_data = open(self._testdir + '/' + filename) lc_class = filename.strip().split('_')[0] lc = [[], []] for line in lc_data: line = line.strip().split(',') lc[0].append(float(line[0])) lc[1].append(float(line[1])) lc_data.close() normalise(lc) lc = sample(lc, 400) lc = distribute(lc) # Update the nearest neighbour distance = self._distance_fn(test_lc, lc) # Find insert point insert_point = 0 found = False for insert_point, bd in enumerate(best_distances): if bd >= distance: found = True break if found or len(best_distances) == 0: best_distances.insert(insert_point, distance) best_matches.insert(insert_point, lc_class) best_files.insert(insert_point, filename) # Pop from the top of the list if it's too long if len(best_distances) > N: best_distances.pop() best_matches.pop() best_files.pop() # Compute nearest neighbor by majority near_count = {} for c in best_matches: if c not in near_count.keys(): near_count[c] = 1 else: near_count[c] += 1 #print sorted(near_count.items(), key=itemgetter(1)) return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]
def _raised_cosine(im): m, n = np.shape(im) w1 = np.cos(np.linspace(-np.pi / 2, np.pi / 2, m)) w1 = w1[:, None] w2 = np.cos(np.linspace(-np.pi / 2, np.pi / 2, n)) w = w1 * w2 return utils.normalise(im * w)
def generate_forest(self, threshold=0.25, tree_chance=0.2): noise = [] for i in range(self.width): noise.append([]) for j in range(self.height): noise[i].append(0) PNFactory_forest = perlin.PerlinNoiseFactory(2, octaves=3, tile=(), unbias=False) for i in range(self.width): for j in range(self.height): noise[i][j] = PNFactory_forest(i / self.width, j / self.height) noise1D = [] for i in range(self.width): for j in range(self.height): noise1D.append(noise[i][j]) _min = np.min(noise1D) _max = np.max(noise1D) for i in range(self.width): for j in range(self.height): v = utils.normalise(noise[i][j], _min, _max) if v < threshold and self.grid[i][j].get_type( ) in life.Tree.get_good_tiles(): if self.grid[i][j].food == None and not self.grid[i][ j].is_river and np.random.random() < tree_chance: self.grid[i][j].set_tree( life.Tree(self.simu, self.grid[i][j], randomness=True))
def generate_elevation(self, start_tile=None): noise = [] for i in range(self.width): noise.append([]) for j in range(self.height): noise[i].append(0) PNFactory = perlin.PerlinNoiseFactory(2, octaves=4, tile=(), unbias=True) for i in range(self.width): for j in range(self.height): noise[i][j] = PNFactory(i / self.width, j / self.height) noise1D = [] for i in range(self.width): for j in range(self.height): noise1D.append(noise[i][j]) _min = np.min(noise1D) _max = np.max(noise1D) for i in range(self.width): for j in range(self.height): self.grid[i][j].elevation_raw = utils.normalise( noise[i][j], _min, _max) self.grid[i][j].elevation = -3 + ( self.grid[i][j].elevation_raw * 11) self.grid[i][j].set_type_from_elevation() if self.grid[i][j] == "SHALLOW_WATER": self.shallow_water_tiles.append(self.grid[i][j])
def _update_filter(self): print('update_filter:',self.select_file.value,self.ds.isel(time_slice=self.time_slice_deep_slider.value).amplitude.values.shape,self.filter_.shape) if self.selection.selection_expr is not None: hvds = hv.Dataset( ( np.linspace(-0.5, 0.5, self.filter_.shape[1]), np.linspace(-0.5, 0.5, self.filter_.shape[0]), np.zeros(self.filter_.shape), ), ["x", "y"], "val", ) hvds = hv.Dataset(hvds.dframe()) hvds.data["val"].loc[ hvds.select(self.selection.selection_expr).data.index ] = 1 data = hvds["val"].reshape(self.filter_.shape).copy().T[::-1] gauss_kernel = utils.scipy_gaussian_2D(int(self.filter_.shape[1]/40)) filter00 = signal.fftconvolve(data, gauss_kernel, mode="same") filter00 = utils.normalise(filter00) self.filter_ = self.filter_ + filter00 filter_ = hv.Image(self.filter_, group="filter") return filter_
def run(self, hive, drone, target): """Runs the controller. Arguments: hive {Hivemind} -- The hivemind. drone {Drone} -- Drone being controlled. target {np.ndarray} -- World coordinates of where we want to hit the ball. """ # Calculate drone's distance to ball. distance = np.linalg.norm(hive.ball.pos - drone.pos) # Find directions based on where we want to hit the ball. direction_to_hit = normalise(target - hive.ball.pos) perpendicular_to_hit = np.cross(direction_to_hit, a3l([0, 0, 1])) # Calculating component lengths and multiplying with direction. perpendicular_component = perpendicular_to_hit * cap( np.dot(perpendicular_to_hit, hive.ball.pos), -distance * self.PERP_DIST_COEFF, distance * self.PERP_DIST_COEFF) in_direction_component = -direction_to_hit * distance * self.DIRECT_DIST_COEFF # Combine components to get a drive target. drive_target = hive.ball.pos + in_direction_component + perpendicular_component super().run(hive, drone, drive_target)
def get_starting_pos(self): p = utils.Position() p.x = 0.0 p.y = 0.0 p.z = -50.0 p.speed = 1400.0 return utils.normalise(p, self.parameters)
def run(self, hive, drone, target): """Runs the controller. Arguments: hive {Hivemind} -- The hivemind. drone {Drone} -- Drone being controlled. target {np.ndarray} -- World coordinates of where to dodge towards. """ # Calculates local target and direction. local_target = local(drone.orient_m, drone.pos, target) direction = normalise(local_target) # First jump if self.timer <= self.FST_JUMP_DURATION: drone.ctrl.jump = True # Second jump, i.e. dodge. if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY: drone.ctrl.jump = True drone.ctrl.pitch = -direction[0] drone.ctrl.paw = direction[1] # Expiration of the controller. if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY + self.SND_JUMP_DURATION: drone.controller = None super().run(hive)
def run(self, agent, player, target): """Runs the controller. Arguments: agent {BaseAgent} -- The agent. player {Car} -- Car object for which to generate controls. target {np.ndarray} -- World coordinates of where we want to hit the ball. """ # Calculate drone's distance to ball. distance = np.linalg.norm(agent.ball.pos - agent.pos) # Find directions based on where we want to hit the ball. direction_to_hit = normalise(target - agent.ball.pos) perpendicular_to_hit = np.cross(direction_to_hit, a3l([0, 0, 1])) # Calculating component lengths and multiplying with direction. perpendicular_component = perpendicular_to_hit * cap( np.dot(perpendicular_to_hit, agent.ball.pos), -distance * self.PERP_DIST_COEFF, distance * self.PERP_DIST_COEFF) in_direction_component = -direction_to_hit * distance * self.DIRECT_DIST_COEFF # Combine components to get a drive target. drive_target = agent.ball.pos + in_direction_component + perpendicular_component super().run(agent, player, drive_target)
def run(self, agent, player, target): """Runs the controller. Arguments: agent {BaseAgent} -- The agent. player {Car} -- Car object for which to generate controls. target {np.ndarray} -- World coordinates of where to dodge towards. """ # Calculates local target and direction. local_target = local(player.orient_m, player.pos, target) direction = normalise(local_target) # First jump if self.timer <= self.FST_JUMP_DURATION: agent.ctrl.jump = True # Second jump, i.e. dodge. if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY: agent.ctrl.jump = True agent.ctrl.pitch = -direction[0] agent.ctrl.paw = direction[1] # Expiration of the controller. if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY + self.SND_JUMP_DURATION: agent.controller = None super().run(agent)
def _corrupt(self, data, corruption): if type(corruption) == float: cdata = np.random.binomial(size=data.shape, n=1, p=1.-corruption) * data elif np.shape(np.asarray(corruption).T) == np.shape(data): cdata = corruption.T else: if self.layers[0].data_std is not None and self.layers[0].data_norm is not None: scales = np.random.uniform(low=corruption[0], high=corruption[1], size=data.shape[1]) data = u.unnormalise(data, self.layers[0].data_norm[0], self.layers[0].data_norm[1]) data = u.unstandardize(data, self.layers[0].data_std[0], self.layers[0].data_std[1]) p = np.random.binomial noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] #* p(1, 0.5) noise_maps = np.asarray(noise_maps) cdata = data + noise_maps.T cdata, _, _ = u.standardize(cdata, self.layers[0].data_std[0], self.layers[0].data_std[1]) cdata, _, _ = u.normalise(cdata, self.layers[0].data_norm[0], self.layers[0].data_norm[1]) # Just making sure we're not out of bounds: min_thr = 1e-6 max_thr = 0.99999 #if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False: # print np.amin(data), np.amax(data), np.mean(data), np.std(data) # print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum() # print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata) # print cdata[cdata < min_thr] = min_thr cdata[cdata > max_thr] = max_thr return cdata
def prepare_sentence(str_words, word_to_id, char_to_id, lower, zeros): """ Prepare a sentence for evaluation. """ words = [ word_to_id[normalise(w, lower, zeros) if normalise(w, lower, zeros) in word_to_id else '<UNK>'] for w in str_words ] chars = [[char_to_id[c] for c in w if c in char_to_id] for w in str_words] caps = [cap_feature(w) for w in str_words] pos_tags = pos_feature(str_words) return { 'str_words': str_words, 'words': words, 'chars': chars, 'caps': caps, 'pos_tags': pos_tags, }
def assess_states(self, saved_path, savestring='example', pdf_savepath='../', make_pdfs=True): self.savestring = savestring self.pdf_savepath = pdf_savepath self.dataobj = pickle.load(open(saved_path, 'rb')) self.norm_data = utils.normalise(self.dataobj.data_array) self.norm_data = utils.filterArray( self.norm_data, window_size=self.sg_filter_window_size, order=self.sg_filter_window_order) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform( feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) # predict probability and also the actual state self.pred_table = self.r_forest_lda.predict_proba( lda_iss_features) * 100 self.preds = self.r_forest_lda.predict(lda_iss_features) # Make stuff for the excel sheet self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis=1) self.threshold_for_mixed = np.where( self.max_preds < int(self.threshold), 1, 0) # 1 when below # do the 1st vs 2nd most likely states self.sorted_pred = np.sort(self.pred_table, axis=1) self.ratio = np.divide(self.sorted_pred[:, 2], self.sorted_pred[:, 3]) self.threshold_for_ratio = np.where(self.ratio > 0.5, 1, 0) # 1 when below # combine the two measures self.combined_pass = np.logical_or(self.threshold_for_mixed, self.threshold_for_ratio) self._string_fun2() self._write_to_excel() if make_pdfs: plot_traces( self.norm_data, self.preds, savepath=self.pdf_savepath + self.savestring, #savestring = '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/pdfs0302/'+self.savestring, prob_thresholds=self.combined_pass)
def word_mapping(sentences, lower, zeros): """ Create a dictionary and a mapping of words, sorted by frequency. """ words = [[normalise(x[0], lower, zeros) for x in s] for s in sentences] dico = create_dico(words) dico['<UNK>'] = 10000000 word_to_id, id_to_word = create_mapping(dico) print "Found %i unique words (%i in total)" % (len(dico), sum(len(x) for x in words)) return dico, word_to_id, id_to_word
def test_path(detail): # Path definition. a = a3l([3072, -4096, 0]) b = a3l([3072, 2300, 0]) c = a3l([1072, 2300, 0]) part1 = straight(a, b, detail) part2 = arc(c, 2000, 0, 3 * np.pi / 4, detail) d = part2[-1] e = d + 1500 * normalise(part2[-1] - part2[-2]) f = a3l([0, 1024, 0]) g = a3l([0, 0, 0]) part3 = bezier_cubic(d, e, f, g, detail) h = a3l([-512, 0, 0]) part4 = arc(h, 512, 0, -np.pi, detail) i = part4[-1] j = i + 1500 * normalise(part4[-1] - part4[-2]) k = a3l([-2800, 1200, 0]) l = a3l([-3500, 500, 0]) part5 = bezier_cubic(i, j, k, l, detail) m = 2 * l - k n = a3l([-3072, -1200, 0]) o = a3l([-3072, -2000, 0]) p = a3l([-3072, -4096, 0]) part6 = bezier_cubic(l, m, n, o, detail) part7 = straight(o, p, detail) # Connect all the parts. path = np.concatenate((part1, part2, part3, part4, part5, part6, part7)) return path
def main(): """ Load data, find the optimal K value on a subsample, use this value of K to evaluate on a larger subsample of the data, printing updated statistics along the way, before returning mean Jaccard index across the data subsample """ data_path = os.getcwd() + "/data/JPEGImages/480p/" anno_path = os.getcwd() + "/data/Annotations/480p/" imgs,masks = generate_dataset_unsupervised(data_path, anno_path,hsv=False) # in case data loading has ordering shuffle(imgs) shuffle(masks) K=FindOptimalK(imgs,masks,10) print("Optimal value of K is " + str(K)) j_scores = [] print("There are " + str(len(imgs)) + " images in the dataset.") for i in range(340): img = imgs[i] mask = masks[i] features = normalise(xyrgb(img).T) labels = RunKMeans(features, mask.shape, K) best_label = FindForegroundCluster(labels, mask, K) binary_fore = np.where(labels == best_label, 1, 0) binary_mask = np.where(mask >0 , 1 , 0) j = jaccard_index(binary_fore,binary_mask) print("This runs jscore " + str(j)) j_scores.append(j) print("Running mean j-score " + str(np.mean(j_scores))) print("The mean Jaccard index across the data was " + str(np.mean(j_scores))) return j_scores
def get_locations(article): #find entities #entities are in article['summary'] article['title'] = normalise(article['title']) article['summary'] = normalise(article['summary']) raw = parser(article['title'] + " " + article['summary']) merge_ents(raw) entities = get_ents(raw) #locate entities #places is my entity databases entity_list = [] for ent in entities: if ent.lower not in ["mister", "mr.", "mr"]: locations = get_location(ent) entity_list.append({"entity": ent, "locations": locations}) #add entities to article article['entities'] = entity_list article['added_by'] = "system" #return article return article
def assess_states(self, raw_path=None, downsample_rate=None, savestring='example', threshold=65, raw_load=True, saved_path=None, make_pdfs=True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict=self.fs_dict) self.dataobj.load_data() f = open('../' + savestring + '_saved', 'wb') pickle.dump(self.dataobj, f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path, 'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform( feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress=True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba( lda_iss_features) * 100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis=1) #print pred_table self.threshold_for_mixed = np.where( self.max_preds < int(self.threshold), 1, 0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs()
def readDM(): print "Loading dm space..." dm_dict = {} with open("../DS/ukwac.predict.dm") as f: dmlines = f.readlines() f.close() #Make dictionary with key=row, value=vector for l in dmlines: items = l.rstrip('\n').split('\t') row = items[0] vec = [float(i) for i in items[1:]] dm_dict[row] = normalise(vec) print "Space loaded..." return dm_dict
def draw_healthbar(value, max_value, topleft, size, surface, c1=(255, 0, 0, 255), c2=(0, 255, 0, 255), min_value=0): factor = utils.normalise(value, min_value, max_value) pygame.draw.rect(surface, c1, pygame.Rect(topleft, size)) if int(size[0] * factor) != 0: pygame.draw.rect( surface, c2, pygame.Rect(topleft, (int(size[0] * factor), size[1])))
def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example', threshold = 65, raw_load = True, saved_path = None, make_pdfs = True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict) self.dataobj.load_data() f = open('../'+savestring+'_saved','wb') pickle.dump(self.dataobj,f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path,'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform(feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress = True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis = 1) #print pred_table self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs()
def process_two_images(model, imgs, ctx=None): """ Process two images into one flow image Args: model: The model to use imgs: a list of 2 images ctx: the model ctx Returns: """ if len(imgs) != 2: return None if isinstance(imgs[0], str): if os.path.exists(imgs[0]): imgs[0] = cv2.cvtColor(cv2.imread(files[i]), cv2.COLOR_BGR2RGB) else: return None if isinstance(imgs[1], str): if os.path.exists(imgs[1]): imgs[1] = cv2.cvtColor(cv2.imread(files[i]), cv2.COLOR_BGR2RGB) else: return None imgs = crop(imgs) imgs = np.array(imgs) imgs = np.moveaxis(imgs, -1, 1) imgs = normalise(imgs) imgs = mx.nd.array(imgs, ctx=ctx) imgs = mx.nd.expand_dims(imgs, 0) # add batch axis flow = model(imgs) # run the model flow = flow.asnumpy() flow = flow.squeeze() flow = flow.transpose(1, 2, 0) img = flow_to_image(flow) img = imresize( img, 4.0 ) # doing the bilinear interpolation on the img, NOT flow cause was too hard :'( return img, flow
def _corrupt(self, data): if type(self.corruption) == float: cdata = np.random.binomial(size=data.shape, n=1, p=1.-self.corruption) * data elif np.shape(np.asarray(self.corruption).T) == np.shape(data): cdata = self.corruption.T else: if self.data_std is not None and self.data_norm is not None: scales = np.random.uniform(low=self.corruption[0], high=self.corruption[1], size=data.shape[1]) data = u.unnormalise(data, self.data_norm[0], self.data_norm[1]) data = u.unstandardize(data, self.data_std[0], self.data_std[1]) p = np.random.binomial noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] # * p(1, 0.5) noise_maps = np.asarray(noise_maps) cdata = data + noise_maps.T cdata, _, _ = u.standardize(cdata, self.data_std[0], self.data_std[1]) cdata, _, _ = u.normalise(cdata, self.data_norm[0], self.data_norm[1]) # Just making sure we're not out of bounds: min_thr = 1e-6 max_thr = 0.99999 #if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False: # print np.amin(data), np.amax(data), np.mean(data), np.std(data) # print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum() cdata[cdata < min_thr] = min_thr cdata[cdata > max_thr] = max_thr #print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata) else: raise RuntimeError("Can't normalise the data (%s, %s). You must provide the normalisation and standardisation values. Giving up." % (self.data_std, self.data_norm)) #print np.amin(data), np.amax(data) #print np.amin(cdata), np.amax(cdata) return cdata
def FindOptimalK(imgs,masks, upper=10): """ Find K value with best mean J score on first 50 training examples, to be used for further experimentation """ out = 0 outmax = 0.0 for k in range(2,upper): print(k) j_scores = [] for i in range(25): img = imgs[i] mask = masks[i] features = normalise(xyrgb(img).T) labels = RunKMeans(features, mask.shape, k) best_label = FindForegroundCluster(labels, mask, k) binary_fore = np.where(labels == best_label, 1, 0) binary_mask = np.where(mask >0 , 1 , 0) j = jaccard_index(binary_fore,binary_mask) j_scores.append(j) print(np.mean(j_scores)) if np.mean(j_scores) > outmax: out =k outmax = np.mean(j_scores) return out
def k(self, new): self.k_history.append(normalise(new)) self._k = normalise(new)
for context in sorted(chars, key=chars.get, reverse=True): ppmi = chars[context] i = 0 context_vector = np.zeros(num_dims) #print("Reweighting vector with context",context) for col in background_cols: if context in background_space and col in background_space: #in case core space does not include context (e.g. bnc.2000 does not include 'rat') context_vector[i] = pow( utils.cosine_similarity(background_space[context], background_space[col]), context_weight) if math.isnan(context_vector[i]): context_vector[i] = 0.0 i += 1 context_vector = utils.normalise(context_vector) reweighted_vectors.append(background_space[target] * context_vector) c += 1 if c > num_chars: break '''Add character to space''' #print("Computing vector for",character) background_space[character] = sum(reweighted_vectors) new_chars = {} for i in range(len(background_space[character])): new_chars[background_cols[i]] = background_space[character][i] '''Print top contexts for character''' c = 1 top_contexts = ""
def build(self, dropout, char_dim, char_lstm_dim, char_bidirect, word_dim, word_lstm_dim, word_bidirect, lr_method, pre_emb, crf, cap_dim, training=True, **kwargs ): """ Build the network. """ # Training parameters n_words = len(self.id_to_word) n_chars = len(self.id_to_char) n_tags = len(self.id_to_tag) # Number of capitalization features if cap_dim: n_cap = 4 # Network variables is_train = T.iscalar('is_train') word_ids = T.ivector(name='word_ids') char_for_ids = T.imatrix(name='char_for_ids') char_rev_ids = T.imatrix(name='char_rev_ids') char_pos_ids = T.ivector(name='char_pos_ids') tag_ids = T.ivector(name='tag_ids') if cap_dim: cap_ids = T.ivector(name='cap_ids') # Sentence length s_len = (word_ids if word_dim else char_pos_ids).shape[0] # Final input (all word features) input_dim = 0 inputs = [] # # Word inputs # if word_dim: input_dim += word_dim word_layer = EmbeddingLayer(n_words, word_dim, name='word_layer') word_input = word_layer.link(word_ids) inputs.append(word_input) # Initialize with pretrained embeddings if pre_emb and training: new_weights = word_layer.embeddings.get_value() print 'Loading pretrained embeddings from %s...' % pre_emb pretrained = {} emb_invalid = 0 for i, line in enumerate(open(pre_emb, 'r')): line = line.decode('utf8', 'ignore') line = line.rstrip().split() if len(line) == word_dim + 1: pretrained[line[0]] = np.array( [float(x) for x in line[1:]] ).astype(np.float32) else: emb_invalid += 1 if emb_invalid > 0: print 'WARNING: %i invalid lines' % emb_invalid c_found = 0 c_normal = 0 # Lookup table initialization for i in xrange(n_words): word = self.id_to_word[i] if word in pretrained: new_weights[i] = pretrained[word] c_found += 1 elif normalise(word, True, True) in pretrained: new_weights[i] = pretrained[normalise(word, True, True)] c_normal += 1 else: print word word_layer.embeddings.set_value(new_weights) print 'Loaded %i pretrained embeddings.' % len(pretrained) print ('%i / %i (%.4f%%) words have been initialized with ' 'pretrained embeddings.') % ( c_found + c_normal, n_words, 100. * (c_found + c_normal) / n_words ) print ('%i found directly, %i after normalising,') % ( c_found, c_normal ) # # Chars inputs # if char_dim: input_dim += char_lstm_dim char_layer = EmbeddingLayer(n_chars, char_dim, name='char_layer') char_lstm_for = LSTM(char_dim, char_lstm_dim, with_batch=True, name='char_lstm_for') char_lstm_rev = LSTM(char_dim, char_lstm_dim, with_batch=True, name='char_lstm_rev') char_lstm_for.link(char_layer.link(char_for_ids)) char_lstm_rev.link(char_layer.link(char_rev_ids)) char_for_output = char_lstm_for.h.dimshuffle((1, 0, 2))[ T.arange(s_len), char_pos_ids ] char_rev_output = char_lstm_rev.h.dimshuffle((1, 0, 2))[ T.arange(s_len), char_pos_ids ] inputs.append(char_for_output) if char_bidirect: inputs.append(char_rev_output) input_dim += char_lstm_dim # # Capitalization feature # if cap_dim: input_dim += cap_dim cap_layer = EmbeddingLayer(n_cap, cap_dim, name='cap_layer') inputs.append(cap_layer.link(cap_ids)) # Prepare final input if len(inputs) != 1: inputs = T.concatenate(inputs, axis=1) # # Dropout on final input # if dropout: dropout_layer = DropoutLayer(p=dropout) input_train = dropout_layer.link(inputs) input_test = (1 - dropout) * inputs inputs = T.switch(T.neq(is_train, 0), input_train, input_test) # LSTM for words word_lstm_for = LSTM(input_dim, word_lstm_dim, with_batch=False, name='word_lstm_for') word_lstm_rev = LSTM(input_dim, word_lstm_dim, with_batch=False, name='word_lstm_rev') word_lstm_for.link(inputs) word_lstm_rev.link(inputs[::-1, :]) word_for_output = word_lstm_for.h word_rev_output = word_lstm_rev.h[::-1, :] if word_bidirect: final_output = T.concatenate( [word_for_output, word_rev_output], axis=1 ) tanh_layer = HiddenLayer(2 * word_lstm_dim, word_lstm_dim, name='tanh_layer', activation='tanh') final_output = tanh_layer.link(final_output) else: final_output = word_for_output # Sentence to Named Entity tags - Score final_layer = HiddenLayer(word_lstm_dim, n_tags, name='final_layer', activation=(None if crf else 'softmax')) tags_scores = final_layer.link(final_output) # No CRF if not crf: cost = T.nnet.categorical_crossentropy(tags_scores, tag_ids).mean() # CRF else: transitions = shared((n_tags + 2, n_tags + 2), 'transitions') small = -1000 b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32) e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32) observations = T.concatenate( [tags_scores, small * T.ones((s_len, 2))], axis=1 ) observations = T.concatenate( [b_s, observations, e_s], axis=0 ) # Score from tags real_path_score = tags_scores[T.arange(s_len), tag_ids].sum() # Score from transitions b_id = theano.shared(value=np.array([n_tags], dtype=np.int32)) e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32)) padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0) real_path_score += transitions[ padded_tags_ids[T.arange(s_len + 1)], padded_tags_ids[T.arange(s_len + 1) + 1] ].sum() all_paths_scores = forward(observations, transitions) cost = - (real_path_score - all_paths_scores) # Network parameters params = [] if word_dim: self.add_component(word_layer) params.extend(word_layer.params) if char_dim: self.add_component(char_layer) self.add_component(char_lstm_for) params.extend(char_layer.params) params.extend(char_lstm_for.params) if char_bidirect: self.add_component(char_lstm_rev) params.extend(char_lstm_rev.params) self.add_component(word_lstm_for) params.extend(word_lstm_for.params) if word_bidirect: self.add_component(word_lstm_rev) params.extend(word_lstm_rev.params) if cap_dim: self.add_component(cap_layer) params.extend(cap_layer.params) self.add_component(final_layer) params.extend(final_layer.params) if crf: self.add_component(transitions) params.append(transitions) if word_bidirect: self.add_component(tanh_layer) params.extend(tanh_layer.params) # Prepare train and eval inputs eval_inputs = [] if word_dim: eval_inputs.append(word_ids) if char_dim: eval_inputs.append(char_for_ids) if char_bidirect: eval_inputs.append(char_rev_ids) eval_inputs.append(char_pos_ids) if cap_dim: eval_inputs.append(cap_ids) train_inputs = eval_inputs + [tag_ids] # Parse optimization method parameters if "-" in lr_method: lr_method_name = lr_method[:lr_method.find('-')] lr_method_parameters = {} for x in lr_method[lr_method.find('-') + 1:].split('-'): split = x.split('_') assert len(split) == 2 lr_method_parameters[split[0]] = float(split[1]) else: lr_method_name = lr_method lr_method_parameters = {} # Compile training function print 'Compiling...' if training: updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters) f_train = theano.function( inputs=train_inputs, outputs=cost, updates=updates, givens=({is_train: np.cast['int32'](1)} if dropout else {}) ) else: f_train = None # Compile evaluation function if not crf: f_eval = theano.function( inputs=eval_inputs, outputs=tags_scores, givens=({is_train: np.cast['int32'](0)} if dropout else {}) ) else: f_eval = theano.function( inputs=eval_inputs, outputs=forward(observations, transitions, viterbi=True, return_alpha=False, return_best_sequence=True), givens=({is_train: np.cast['int32'](0)} if dropout else {}) ) return f_train, f_eval
variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) loss_epoch += loss # Report progress if not (n_batch + 1) % max(1, np.round(total_batches / 40)): progress = np.round(((n_batch + 1) / total_batches) * 100) avg_loss = loss_epoch / (n_batch + 1) print( "Epoch {}. Progress {}. Average loss in the epoch until now is {}" .format(ep, progress, avg_loss)) # VALIDATION qid_emb = model.ent_rpr(cands_qid).numpy() qid_emb = normalise(qid_emb) mnt_embed = model.mnt_rpr(dev_mnt).numpy() mnt_embed = normalise(mnt_embed) scores = np.dot(mnt_embed, qid_emb.T) # Check if the right entity is in the TOP-30 (much faster than computing np.argsort) k = 30 ranking = np.argpartition(scores, -k)[:, -k:] easy, medium, hard, total = [], [], [], [] cnt_nones = 0 for which in range(len(dev_ent)): if dev_ent[which] in qid2id: is_shortlisted = int(qid2id[dev_ent[which]] in ranking[which]) else: is_shortlisted = 0 cnt_nones += 1 total.append(is_shortlisted)
if __name__ == "__main__": #Parameters for the dataset chunk_size = 200 #Load in the input data dirs = extract_file_names( "/home/alex/Projects/Unsupervised/kepler_q9_variability/") data = extract_data(dirs) data = split_to_chunk(data, chunk_size) datalist = convert_datalist(data) datalist = normalise(datalist) data_arr = np.vstack(datalist) with open("autoencoder_dataset.pkl", "wb") as f: pickle.dump(data_arr, f) print("Written ae_dataset.pkl") ### Plotting #for i in range(0, 100): # print(datalist[i].shape) # plt.figure() # plt.title('Segment %i' % i) # plt.scatter(range(0, chunk_size), datalist[i]) # plt.ylabel('Un-normalised flux') # plt.xlabel('Data point index')
if c.patch_type == 'random': data = np.load(c.dataroot + 'nr_patches_1000_random.npz') #data = np.load(c.dataroot + 'nr_patches_500_random1.npz') else: data = np.load(c.dataroot + 'nr_patches_1000_most_err_patch_size_96_nr_pats_41_percent_fp_0_mode_fpfn_part_1.npz') imgs = data['img'] mask = data['label'] if c.nr_patients<41: imgs = imgs[:c.nr_patients*1000] mask = mask[:c.nr_patients*1000] # normalise the input images to range between [-1,1] imgs_norm = np.array([ut.normalise(i) for i in imgs[:, :, :, 0]]) # convert the images and masks to tensors tensor_imgs = torch.FloatTensor(imgs_norm) tensor_mask = torch.FloatTensor(mask[:, :, :, 0]) # removing channel dimension for mask or label as well # stack them together for the generator as 2 channels train_pair = torch.stack((tensor_imgs, tensor_mask), 1) dataset = data_utils.TensorDataset(train_pair) dataloader = data_utils.DataLoader(dataset, batch_size=c.batch_size, shuffle=True, num_workers=c.workers) # Device selection device = torch.device("cuda:"+str(c.cuda_n[0]) if(torch.cuda.is_available() and
def main(): # parse the command line arguments parser = utils.argument_parser() args = parser.parse_args() print("-------------------------------") print("classifier:%s" % args.classifier) print("inverter:%s" % args.inverter) print("dataset_path:%s" % args.dataset_path) print("dataset name:%s" % args.dataset) print("results path:%s" % args.results_dir) print("inverting from: %s" % args.layer) print("-------------------------------") # default parameters sample_rate = 22050 frame_len = 1024 fps = 70 mel_bands = 80 mel_min = 27.5 mel_max = 8000 blocklen = 115 batchsize = 32 start_offset = 10 # secs end_offset = 20 # secs bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), args.dataset_path, 'datasets', args.dataset) # load filelist with io.open(os.path.join(datadir, 'filelists', 'test')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] # compute spectra print("Computing%s spectra..." % (" or loading" if args.cache_spectra else "")) spects = [ ] # list of tuples, where each tuple has magnitude and phase information for one audio file for fn in progress(filelist, 'File '): cache_fn = (args.cache_spectra and os.path.join(args.cache_spectra, fn + '.npy')) spects.append( cached(cache_fn, audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps)) # prepare mel filterbank filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands, mel_min, mel_max) filterbank = filterbank[:bin_mel_max].astype(floatX) # precompute mel spectra, if needed, otherwise just define a generator mel_spects = (np.log( np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7)) for spect in spects) # load mean/std or compute it, if not computed yet meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % args.dataset) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) print("Preparing training data feed...") # normalised mel spects, without data augmentation mel_spects = [(spect - mean) * istd for spect in mel_spects] # we create two theano functions # the first one uses pre-trained classifier to generate features and predictions # the second one uses pre-trained inverter to generate mel spectrograms from input features # classifier (discriminator) model input_var = T.tensor3('input') inputs = input_var.dimshuffle( 0, 'x', 1, 2 ) # insert "channels" dimension, changes a 32 x 115 x 80 input to 32 x 1 x 115 x 80 input which is fed to the CNN network = model.architecture(inputs, (None, 1, blocklen, mel_bands)) # load saved weights with np.load(args.classifier) as f: lasagne.layers.set_all_param_values( network['fc9'], [f['param%d' % i] for i in range(len(f.files))]) # create output expression outputs_score = lasagne.layers.get_output(network[args.layer], deterministic=True) outputs_pred = lasagne.layers.get_output(network['fc9'], deterministic=True) # prepare and compile prediction function print("Compiling classifier function...") pred_fn_score = theano.function([input_var], outputs_score, allow_input_downcast=True) pred_fn = theano.function([input_var], outputs_pred, allow_input_downcast=True) # inverter (generator) model if (args.layer == 'fc8') or (args.layer == 'fc7'): input_var_deconv = T.matrix('input_var_deconv') else: input_var_deconv = T.tensor4('input_var_deconv') # inverter (generator) model if (args.layer == 'fc8'): gen_network = upconv.architecture_upconv_fc8( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1])) elif args.layer == 'fc7': gen_network = upconv.architecture_upconv_fc7( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1])) elif args.layer == 'mp6': gen_network = upconv.architecture_upconv_mp6( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) elif args.layer == 'conv5': gen_network = upconv.architecture_upconv_conv5( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) elif args.layer == 'conv4': gen_network = upconv.architecture_upconv_conv4( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) elif args.layer == 'mp3': gen_network = upconv.architecture_upconv_mp3( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) elif args.layer == 'conv2': gen_network = upconv.architecture_upconv_conv2( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) else: gen_network = upconv.architecture_upconv_conv1( input_var_deconv, (batchsize, lasagne.layers.get_output_shape( network[args.layer])[1], lasagne.layers.get_output_shape(network[args.layer])[2], lasagne.layers.get_output_shape(network[args.layer])[3]), args.n_conv_layers, args.n_conv_filters) # load saved weights with np.load(args.inverter) as f: lasagne.layers.set_all_param_values( gen_network, [f['param%d' % i] for i in range(len(f.files))]) # create cost expression outputs = lasagne.layers.get_output(gen_network, deterministic=True) print("Compiling inverter function...") test_fn = theano.function([input_var_deconv], outputs, allow_input_downcast=True) # instance-based feature inversion # (1) pick a file from a dataset (e.g., dataset: Jamendo test) (2) select a time index to read the instance file_idx = np.arange(0, len(filelist)) hop_size = sample_rate / fps # samples for file_instance in file_idx: print("<<<<Analysis for the file: %d>>>>" % (file_instance + 1)) time_idx = np.random.randint( start_offset, end_offset, 1 )[0] # provides a random integer start position between start and end offsets # generate excerpts for the selected file_idx # excerpts is a 3-d array of shape: num_excerpts x blocklen x mel_spects_dimensions num_excerpts = len(mel_spects[file_instance]) - blocklen + 1 print("Number of excerpts in the file :%d" % num_excerpts) excerpts = np.lib.stride_tricks.as_strided( mel_spects[file_instance], shape=(num_excerpts, blocklen, mel_spects[file_instance].shape[1]), strides=(mel_spects[file_instance].strides[0], mel_spects[file_instance].strides[0], mel_spects[file_instance].strides[1])) # convert the time_idx to the excerpt index excerpt_idx = int(np.round((time_idx * sample_rate) / (hop_size))) print("Time_idx: %f secs, Excerpt_idx: %d" % (time_idx, excerpt_idx)) if ((excerpt_idx + batchsize) > num_excerpts): print( "------------------Number of excerpts are less for file: %d--------------------" % (file_instance + 1)) break # generating feature representations for the select excerpt. # CAUTION: Need to feed mini-batch to pre-trained model, so (mini_batch-1) following excerpts are also fed, but are not analysed # classifier can have less than minibatch data, but the inverter needs a batch of data to make prediction (comes from how the inverter was trained) scores = pred_fn_score(excerpts[excerpt_idx:excerpt_idx + batchsize]) #print("Feature"), #print(scores[file_idx]) predictions = pred_fn(excerpts[excerpt_idx:excerpt_idx + batchsize]) #print("Prediction:%f" %(predictions[0][0])) mel_predictions = np.squeeze( test_fn(scores), axis=1 ) # mel_predictions is a 3-d array of shape batch_size x blocklen x n_mels # saves plots for the input Mel spectrogram and its inverted representation # all plots are normalised in [0, 1] range plots.plot_figures(utils.normalise(excerpts[excerpt_idx]), utils.normalise(mel_predictions[0]), predictions[0][0], file_instance, excerpt_idx, args.results_dir, args.layer)
def feature_compose( mean_duration: float, mean_packet: float, mean_num_of_bytes: float, #mean_packet_rate: float, #mean_byte_rate: float, std_duration: float, std_packet: float, std_num_of_bytes: float, #std_packet_rate: float, std_byte_rate: float, entropy_protocol: float, entropy_dst_ip: float, entropy_src_port: float, entropy_dst_port: float, entropy_flags: float, proportion_src_port: list, proportion_dst_port: list) -> list: """ Compose the feature array :param mean_duration: mean duration :param mean_packet: mean packet :param mean_num_of_bytes: mean number of bytes #:param mean_packet_rate: mean packet rate #:param mean_byte_rate: mean byte rate :param std_duration: std duration :param std_packet: std packet :param std_num_of_bytes: std number of bytes #:param std_packet_rate: std packet rate #:param std_byte_rate: std byte rate :param entropy_protocol: entropy of protocol :param entropy_dst_ip: entropy of dest ip :param entropy_src_port: entropy of src ip :param entropy_dst_port: entropy of dest port :param entropy_flags: entropy of flags :param proportion_src_port: proportion of src common ports :param proportion_dst_port: proportion of dest common port :type mean_duration: float :type mean_packet: float :type mean_num_of_bytes: float #:type mean_packet_rate: float #:type mean_byte_rate: float :type std_duration: float :type std_packet: float :type std_num_of_bytes: float #:type std_packet_rate: float #:type std_byte_rate: float :type entropy_protocol: float :type entropy_dst_ip: float :type entropy_src_port: float :type entropy_dst_port: float :type entropy_flags: float :type proportion_src_port: list :type proportion_dst_port: list :return: feature array :rtype list """ # normalise mean_duration = normalise(mean_duration, *feature_min_max.get('mean_duration')) mean_packet = normalise(mean_packet, *feature_min_max.get('mean_packet')) mean_num_of_bytes = normalise( mean_num_of_bytes, *feature_min_max.get('mean_num_of_bytes')) #mean_packet_rate = normalise(mean_packet_rate, *feature_min_max.get('mean_packet_rate')) #mean_byte_rate = normalise(mean_byte_rate, *feature_min_max.get('mean_byte_rate')) std_duration = normalise(std_duration, *feature_min_max.get('std_duration')) std_packet = normalise(std_packet, *feature_min_max.get('std_packet')) std_num_of_bytes = normalise(std_num_of_bytes, *feature_min_max.get('std_num_of_bytes')) #std_packet_rate = normalise(std_packet_rate, *feature_min_max.get('std_packet_rate')) #std_byte_rate = normalise(std_byte_rate, *feature_min_max.get('std_byte_rate')) entropy_protocol = normalise(entropy_protocol, *feature_min_max.get('entropy_protocol')) entropy_dst_ip = normalise(entropy_dst_ip, *feature_min_max.get('entropy_dst_ip')) entropy_src_port = normalise(entropy_src_port, *feature_min_max.get('entropy_src_port')) entropy_dst_port = normalise(entropy_dst_port, *feature_min_max.get('entropy_dst_port')) entropy_flags = normalise(entropy_flags, *feature_min_max.get('entropy_flags')) """ feature_arr = [ mean_duration, mean_packet, mean_num_of_bytes, mean_packet_rate, mean_byte_rate, std_duration, std_packet, std_num_of_bytes, std_packet_rate, std_byte_rate, entropy_protocol, entropy_dst_ip, entropy_src_port, entropy_dst_port, entropy_flags, ] """ feature_arr = [ mean_duration, mean_packet, mean_num_of_bytes, std_duration, std_packet, std_num_of_bytes, entropy_protocol, entropy_dst_ip, entropy_src_port, entropy_dst_port, entropy_flags, ] feature_arr.extend(proportion_src_port) feature_arr.extend(proportion_dst_port) return feature_arr
import pickle import matplotlib.pyplot as plt import numpy as np import utils from network_loader import SeizureData from relabeling_functions import relabel,reorder from extrator import FeatureExtractor from classifier import NetworkClassifer from make_pdfs import plot_traces ################# Training Data ################### reload_training = True if reload_training: training_traces = utils.raw_training_load() training_traces_norm = utils.normalise(training_traces) training_data = FeatureExtractor(training_traces_norm) #f = open('../full_raw_training','wb') #pickle.dump(training_traces,f) elif not reload_training: print 'skipping raw training load' training_traces = pickle.load(open('../full_raw_training','rb')) training_traces_norm = utils.normalise(training_traces) training_data = FeatureExtractor(training_traces_norm) np.savetxt('training_traces.csv',training_traces_norm,delimiter=',') ################# Training Labels and mixed event exclusion ################### cleanup = np.loadtxt('../Training_cleanup.csv',delimiter=',') training_labels = np.array([int(x[1]) for x in cleanup]) print training_labels.shape