def compute_edge_features(x,config_d): S, sample_mapping, sample_to_frames = esp.get_spectrogram_features(x.astype(float)/(2**15-1), config_d['SPECTROGRAM']['sample_rate'], config_d['SPECTROGRAM']['num_window_samples'], config_d['SPECTROGRAM']['num_window_step_samples'], config_d['SPECTROGRAM']['fft_length'], config_d['SPECTROGRAM']['freq_cutoff'], config_d['SPECTROGRAM']['kernel_length'], preemph=config_d['SPECTROGRAM']['preemphasis'], no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'], do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'], return_sample_mapping=True ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S.T) esp._edge_map_threshold_segments(E, config_d['EDGES']['block_length'], config_d['EDGES']['spread_length'], threshold=config_d['EDGES']['threshold'], edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks, abst_threshold=config_d['EDGES']['abst_threshold'], verbose=False) return reorg_part_for_fast_filtering(E).astype(np.uint8)
def get_waliji_feature_map(s, log_part_blocks, log_invpart_blocks, abst_threshold=np.array([.025,.025,.015,.015, .02,.02,.02,.02]), spread_length=3, fft_length=512, num_window_step_samples=80, freq_cutoff=3000, sample_rate=16000, num_window_samples=320, kernel_length=7): """ Input is usually just the signal s as the rest of the parameters are not going to change very often Parameters: =========== s: np.ndarray[ndim=1] Raw signal data that we are extracting feature from log_part_blocks: np.ndarray[ndim=4,dtype=np.float32] First dimension is over the different features log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32] Essentially the same array as log_part_blocks. Related by its equal to np.log(1-np.exp(log_part_blocks)) """ S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length, ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 40, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) E = reorg_part_for_fast_filtering(E) F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10) F = np.argmax(F,2) # the amount of spreading to do is governed by the size of the part features F = swp.spread_waliji_patches(F, log_part_blocks.shape[1], log_part_blocks.shape[2], log_part_blocks.shape[0]) return collapse_to_grid(F,log_part_blocks.shape[1], log_part_blocks.shape[2])
def add_frames(self,E,edge_feature_row_breaks=None, edge_orientations=None,abst_threshold=None, time_axis=1): new_E = E.copy() if abst_threshold is not None: esp._edge_map_threshold_segments(new_E, 40, 1, threshold=.3, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) if not self.processed_frames: self.E = np.mean(new_E,axis=time_axis) self.processed_frames = True else: self.E = (self.E * self.num_frames + np.sum(new_E,axis=time_axis))/(self.num_frames+new_E.shape[time_axis]) self.num_frames += new_E.shape[time_axis]
f.write(str(s_idx)+'\t'+s_fname+'\n') s = np.load(s_fname) S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'S.npy',S) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 20, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'E.npy',E) patch_width = 5 patch_height = 5 upper_cutoff = 200 bp,all_patch_rows,all_patch_cols = elf.extract_local_features_tied(E,patch_height, patch_width, lower_cutoff, upper_cutoff, edge_feature_row_breaks, ) # get rid of those that are just hugging the border use_indices = np.logical_and(all_patch_rows < E.shape[0] - patch_height, all_patch_cols < E.shape[1] - patch_width)
data_iter.reset_exp() for datum_id in xrange(data_iter.num_data): if datum_id % 10 == 0: print "working on example", datum_id if data_iter.next(compute_pattern_times=True, max_template_length=classifier.window[1], wait_for_positive_example=True): pattern_times = data_iter.pattern_times num_detections = data_iter.E.shape[1] - classifier.window[1] num_frames += data_iter.E.shape[1] scores = -np.inf * np.ones(num_detections) coarse_count_scores = -np.inf *np.ones(num_detections) coarse_scores = -np.inf * np.ones(num_detections) esp._edge_map_threshold_segments(data_iter.E, classifier.window[1], 1, threshold=.3, edge_orientations = data_iter.edge_orientations, edge_feature_row_breaks = data_iter.edge_feature_row_breaks) for d in xrange(num_detections): E_segment = data_iter.E[:,d:d+classifier.window[1]] scores[d] = classifier.score_no_bg(E_segment) coarse_count_scores[d] = classifier.coarse_score_count(E_segment) if d>1 and d<num_detections-1: if (coarse_count_scores[d-1] > coarse_thresh) and \ ((coarse_count_scores[d-1]>\ coarse_count_scores[d] and\ coarse_count_scores[d-1]>=\ coarse_count_scores[d-2]) or\ (coarse_count_scores[d-1]>=\ coarse_count_scores[d] and\ coarse_count_scores[d-1]>\
# we will use the same training and cross-validation division for each phone for phn in phn_list: train_data_iter.reset_exp() datum_id = 0 patterns = [] lens = [] offset = 3 train_data_iter.patterns = [np.array((phn,))] while train_data_iter.next(wait_for_positive_example=True, compute_pattern_times=True): if datum_id % 20 == 0: print datum_id datum_id += 1 esp._edge_map_threshold_segments(train_data_iter.E, 40, 1, threshold=.3, edge_orientations = train_data_iter.edge_orientations, edge_feature_row_breaks = train_data_iter.edge_feature_row_breaks) pattern_times = esp.get_pattern_times([np.array((phn,))], train_data_iter.phns, train_data_iter.feature_label_transitions) for p in pattern_times: patterns.append(train_data_iter.E[:,max(0,p[0]-offset):min(train_data_iter.E.shape[1],p[1]+offset)].copy()) lens.append(p[1] - p[0] + 1) # get mean length mean_length = int(np.mean(np.array(lens))) template_height,template_length,registered_examples, template = et.simple_estimate_template(patterns,template_length=mean_length) np.save(phn+'_registered_examples070212',registered_examples) np.save(phn+'_template070212',template)
unigrams = defaultdict() bigrams = defaultdict() trigrams = defaultdict() data_iter, _ =\ template_exp.get_exp_iterator(texp,train_percent=1.1) while data_iter.next(): np.save(train_data_path +str(data_iter.cur_data_pointer)+'phns.npy', data_iter.phns) E_spread = data_iter.E.copy() esp._edge_map_threshold_segments(E_spread, 40, 1, threshold=.3, edge_orientations = data_iter.edge_orientations, edge_feature_row_breaks = data_iter.edge_feature_row_breaks) np.save(train_data_path +str(data_iter.cur_data_pointer)+'E_spread3', E_spread.astype(np.uint8)) del E_spread esp._edge_map_threshold_segments(data_iter.E, 40, 2, threshold=.3, edge_orientations = data_iter.edge_orientations, edge_feature_row_breaks = data_iter.edge_feature_row_breaks) np.save(train_data_path +str(data_iter.cur_data_pointer)+'E_spread5', data_iter.E.astype(np.uint8))
print phn_id, phn_list[phn_id] phn_test_examples = np.zeros( (num_target_phns[phn_id], stored_bg.shape[0], target_phns_max_length[phn_id]), dtype=np.uint8 ) phn_test_bgs = np.zeros((num_target_phns[phn_id], stored_bg.shape[0]), dtype=np.float64) phn_lengths = np.zeros(num_target_phns[phn_id], dtype=int) cur_example_idx = 0 for cur_data_pointer in xrange(1, num_test_data + 1): if cur_data_pointer % 20 == 0: print cur_data_pointer if phn_id == 0: E = np.load(data_dir + str(cur_data_pointer) + "tune_E.npy") esp._edge_map_threshold_segments( E, 40, 1, threshold=0.3, edge_orientations=test_data_iter.edge_orientations, edge_feature_row_breaks=test_data_iter.edge_feature_row_breaks, ) np.save(data_dir + str(cur_data_pointer) + "thresholded_E.npy", E) else: E = np.load(data_dir + str(cur_data_pointer) + "thresholded_E.npy") feature_label_transitions = np.load(data_dir + str(cur_data_pointer) + "feature_label_transitions.npy") seq_phns = np.load(data_dir + str(cur_data_pointer) + "phns.npy") for test_phn_id in xrange(seq_phns.shape[0]): test_phn = seq_phns[test_phn_id] # what's the id of the phone in the master list? test_phn_id_list = np.arange(phn_list.shape[0])[phn_list == test_phn][0] if test_phn_id_list != phn_id: continue if test_phn_id + 1 < feature_label_transitions.shape[0]: