Python _edge_map_threshold_segments示例，template_speech_rec.edge_signal_proc._edge_map_threshold_segments Python示例

示例#1

0

显示文件

文件： CExtractbgd.py 项目： markstoehr/zeroresource

def compute_edge_features(x,config_d):
    S, sample_mapping, sample_to_frames =  esp.get_spectrogram_features(x.astype(float)/(2**15-1),
                                                                        config_d['SPECTROGRAM']['sample_rate'],
                                                                        config_d['SPECTROGRAM']['num_window_samples'],
                                                                        config_d['SPECTROGRAM']['num_window_step_samples'],
                                                                        config_d['SPECTROGRAM']['fft_length'],
                                                                        config_d['SPECTROGRAM']['freq_cutoff'],
                                                                        config_d['SPECTROGRAM']['kernel_length'],
                                                                        preemph=config_d['SPECTROGRAM']['preemphasis'],
                                                                        no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'],
                                                                        do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'],
                                                                        return_sample_mapping=True
                                 )

    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S.T)
    esp._edge_map_threshold_segments(E,
                                     config_d['EDGES']['block_length'],
                                     config_d['EDGES']['spread_length'],
                                     threshold=config_d['EDGES']['threshold'],
                                     edge_orientations = edge_orientations,
                                     edge_feature_row_breaks = edge_feature_row_breaks,
                                         abst_threshold=config_d['EDGES']['abst_threshold'],
                                     verbose=False)
    return reorg_part_for_fast_filtering(E).astype(np.uint8)

示例#2

0

显示文件

文件： get_aar_data.py 项目： markstoehr/Template-Speech-Recognition

def get_waliji_feature_map(s,
                           log_part_blocks,
                           log_invpart_blocks,
                           abst_threshold=np.array([.025,.025,.015,.015,
                                                     .02,.02,.02,.02]),
                           spread_length=3,
                           fft_length=512,
                           num_window_step_samples=80,
                           freq_cutoff=3000,
                           sample_rate=16000,
                           num_window_samples=320,
                           kernel_length=7):
    """
    Input is usually just the signal s as the rest of the parameters
    are not going to change very often

    Parameters:
    ===========
    s: np.ndarray[ndim=1]
        Raw signal data that we are extracting feature from
    log_part_blocks: np.ndarray[ndim=4,dtype=np.float32]
        First dimension is over the different features
    log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32]
        Essentially the same array as log_part_blocks. Related
        by its equal to np.log(1-np.exp(log_part_blocks))
    """
    S = esp.get_spectrogram_features(s,
                                     sample_rate,
                                     num_window_samples,
                                     num_window_step_samples,
                                     fft_length,
                                     freq_cutoff,
                                     kernel_length,
                                 )
    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S)
    esp._edge_map_threshold_segments(E,
                                 40,
                                 1,
                                 threshold=.7,
                                 edge_orientations = edge_orientations,
                                 edge_feature_row_breaks = edge_feature_row_breaks)
    E = reorg_part_for_fast_filtering(E)
    F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10)
    F = np.argmax(F,2)
    # the amount of spreading to do is governed by the size of the part features
    F = swp.spread_waliji_patches(F,
                                  log_part_blocks.shape[1],
                                  log_part_blocks.shape[2],
                                  log_part_blocks.shape[0])
    return collapse_to_grid(F,log_part_blocks.shape[1],
                            log_part_blocks.shape[2])

示例#3

0

显示文件

文件： get_aar_data.py 项目： markstoehr/Template-Speech-Recognition

 def add_frames(self,E,edge_feature_row_breaks=None,
                edge_orientations=None,abst_threshold=None,
                time_axis=1):
     new_E = E.copy()
     if abst_threshold is not None:
         esp._edge_map_threshold_segments(new_E,
                                          40,
                                          1,
                                          threshold=.3,
                                          edge_orientations = edge_orientations,
                                          edge_feature_row_breaks = edge_feature_row_breaks)
     if not self.processed_frames:
         self.E = np.mean(new_E,axis=time_axis)
         self.processed_frames = True
     else:
         self.E = (self.E * self.num_frames + np.sum(new_E,axis=time_axis))/(self.num_frames+new_E.shape[time_axis])
     self.num_frames += new_E.shape[time_axis]

示例#4

0

显示文件

文件： test_coding.py 项目： markstoehr/Template-Speech-Recognition

 f.write(str(s_idx)+'\t'+s_fname+'\n')
 s = np.load(s_fname)
 S = esp.get_spectrogram_features(s,
                                  sample_rate,
                                  num_window_samples,
                                  num_window_step_samples,
                                  fft_length,
                                  freq_cutoff,
                              kernel_length)
 if lower_cutoff == 10:
     np.save(tmp_data_path+str(s_idx)+'S.npy',S)
 E, edge_feature_row_breaks,\
   edge_orientations = esp._edge_map_no_threshold(S)
 esp._edge_map_threshold_segments(E,
                                  20,
                                  1,
                                  threshold=.7,
                                  edge_orientations = edge_orientations,
                                  edge_feature_row_breaks = edge_feature_row_breaks)
 if lower_cutoff == 10:
     np.save(tmp_data_path+str(s_idx)+'E.npy',E)
 patch_width = 5
 patch_height = 5
 upper_cutoff = 200
 bp,all_patch_rows,all_patch_cols = elf.extract_local_features_tied(E,patch_height,
                                                                    patch_width, lower_cutoff,
                                                                    upper_cutoff,
                                                                    edge_feature_row_breaks,
     )
 # get rid of those that are just hugging the border
 use_indices = np.logical_and(all_patch_rows < E.shape[0] - patch_height,
                              all_patch_cols < E.shape[1] - patch_width)

示例#5

0

显示文件

文件： redo_mixture_experiments.py 项目： markstoehr/Template-Speech-Recognition

 data_iter.reset_exp()
 for datum_id in xrange(data_iter.num_data):
     if datum_id % 10 == 0:
         print "working on example", datum_id
     if data_iter.next(compute_pattern_times=True,
                         max_template_length=classifier.window[1],
                       wait_for_positive_example=True):
         pattern_times = data_iter.pattern_times
         num_detections = data_iter.E.shape[1] - classifier.window[1]
         num_frames += data_iter.E.shape[1]
         scores = -np.inf * np.ones(num_detections)
         coarse_count_scores = -np.inf *np.ones(num_detections)
         coarse_scores = -np.inf * np.ones(num_detections)
         esp._edge_map_threshold_segments(data_iter.E,
                              classifier.window[1],
                              1, 
                              threshold=.3,
                              edge_orientations = data_iter.edge_orientations,
                              edge_feature_row_breaks = data_iter.edge_feature_row_breaks)
         for d in xrange(num_detections):
             E_segment = data_iter.E[:,d:d+classifier.window[1]]
             scores[d] = classifier.score_no_bg(E_segment)
             coarse_count_scores[d] = classifier.coarse_score_count(E_segment)
             if d>1 and d<num_detections-1:
                 if (coarse_count_scores[d-1] > coarse_thresh) and \
                         ((coarse_count_scores[d-1]>\
                               coarse_count_scores[d] and\
                               coarse_count_scores[d-1]>=\
                               coarse_count_scores[d-2]) or\
                             (coarse_count_scores[d-1]>=\
                               coarse_count_scores[d] and\
                               coarse_count_scores[d-1]>\

示例#6

0

显示文件

文件： phone_classification.py 项目： markstoehr/Template-Speech-Recognition

# we will use the same training and cross-validation division for each phone
for phn in phn_list:
    train_data_iter.reset_exp()
    datum_id = 0
    patterns = []
    lens = []
    offset = 3
    train_data_iter.patterns = [np.array((phn,))]
    while train_data_iter.next(wait_for_positive_example=True,
                               compute_pattern_times=True):
        if datum_id % 20 == 0:
            print datum_id
        datum_id += 1
        esp._edge_map_threshold_segments(train_data_iter.E,
                                 40,
                                 1, 
                                 threshold=.3,
                                 edge_orientations = train_data_iter.edge_orientations,
                                 edge_feature_row_breaks = train_data_iter.edge_feature_row_breaks)
        pattern_times = esp.get_pattern_times([np.array((phn,))],
                                              train_data_iter.phns,
                                              train_data_iter.feature_label_transitions)
        for p in pattern_times:
            patterns.append(train_data_iter.E[:,max(0,p[0]-offset):min(train_data_iter.E.shape[1],p[1]+offset)].copy())
            lens.append(p[1] - p[0] + 1)
    # get mean length
    mean_length = int(np.mean(np.array(lens)))
    template_height,template_length,registered_examples, template = et.simple_estimate_template(patterns,template_length=mean_length)
    np.save(phn+'_registered_examples070212',registered_examples)
    np.save(phn+'_template070212',template)

示例#7

0

显示文件

文件： save_entire_training_set.py 项目： markstoehr/Template-Speech-Recognition

unigrams = defaultdict()
bigrams = defaultdict()
trigrams = defaultdict()

data_iter, _ =\
    template_exp.get_exp_iterator(texp,train_percent=1.1)

while data_iter.next():
    np.save(train_data_path
            +str(data_iter.cur_data_pointer)+'phns.npy',
            data_iter.phns)
    E_spread = data_iter.E.copy()
    esp._edge_map_threshold_segments(E_spread,
                                         40,
                                         1, 
                                         threshold=.3,
                                         edge_orientations = data_iter.edge_orientations,
                                         edge_feature_row_breaks = data_iter.edge_feature_row_breaks)
    np.save(train_data_path
            +str(data_iter.cur_data_pointer)+'E_spread3',
            E_spread.astype(np.uint8))
    del E_spread
    esp._edge_map_threshold_segments(data_iter.E,
                                         40,
                                         2, 
                                         threshold=.3,
                                         edge_orientations = data_iter.edge_orientations,
                                         edge_feature_row_breaks = data_iter.edge_feature_row_breaks)
    np.save(train_data_path
            +str(data_iter.cur_data_pointer)+'E_spread5',
            data_iter.E.astype(np.uint8))

示例#8

0

显示文件

文件： construct_test_set.py 项目： markstoehr/Template-Speech-Recognition

 print phn_id, phn_list[phn_id]
 phn_test_examples = np.zeros(
     (num_target_phns[phn_id], stored_bg.shape[0], target_phns_max_length[phn_id]), dtype=np.uint8
 )
 phn_test_bgs = np.zeros((num_target_phns[phn_id], stored_bg.shape[0]), dtype=np.float64)
 phn_lengths = np.zeros(num_target_phns[phn_id], dtype=int)
 cur_example_idx = 0
 for cur_data_pointer in xrange(1, num_test_data + 1):
     if cur_data_pointer % 20 == 0:
         print cur_data_pointer
     if phn_id == 0:
         E = np.load(data_dir + str(cur_data_pointer) + "tune_E.npy")
         esp._edge_map_threshold_segments(
             E,
             40,
             1,
             threshold=0.3,
             edge_orientations=test_data_iter.edge_orientations,
             edge_feature_row_breaks=test_data_iter.edge_feature_row_breaks,
         )
         np.save(data_dir + str(cur_data_pointer) + "thresholded_E.npy", E)
     else:
         E = np.load(data_dir + str(cur_data_pointer) + "thresholded_E.npy")
     feature_label_transitions = np.load(data_dir + str(cur_data_pointer) + "feature_label_transitions.npy")
     seq_phns = np.load(data_dir + str(cur_data_pointer) + "phns.npy")
     for test_phn_id in xrange(seq_phns.shape[0]):
         test_phn = seq_phns[test_phn_id]
         # what's the id of the phone in the master list?
         test_phn_id_list = np.arange(phn_list.shape[0])[phn_list == test_phn][0]
         if test_phn_id_list != phn_id:
             continue
         if test_phn_id + 1 < feature_label_transitions.shape[0]: