def get_batch_full(self, feat, files, augment=False): sound = [ self.endpoint_detect(sig, rate, augment=augment) for sig, rate in feat ] rate = [_[1] for _ in feat] features = [] len0 = [] for s, r in zip(sound, rate): f = [] mfcc_feat, l = self.feature_extract_mfcc(s, r) # [T,H] len0.append(l) for item in mfcc_feat: f.append(item) if cfg.use_pitch: f.extend(self.feature_extract_pitch(s, r)) if cfg.use_timefeat: f.extend(self.feature_extract_timespace(s, r)) features.append(f) features = [_ for _ in zip(*features)] inp = [cvar(self.pad_batch(_)).transpose(0, 1) for _ in features] inp = torch.cat(inp, 2) return inp, np.array(len0)
def __get_features_from_dict(self, label, f_dict, ref_dict): features = [] for tfile, nfile, rfile, chromosome, pos in ref_dict.get(label): key = ';'.join([rfile, nfile, tfile, chromosome, pos]) try: features.append(eval(f_dict[key][0])) except KeyError: logging.error('error: cannot find key "%s"\n' % str(key)) return features
def __get_features_from_feature_db(self, feature_dict, label): features = [] for key, value in feature_dict.iteritems(): key = key.strip().split(';') label_feature_dict = value[1] if not label == label_feature_dict: continue features.append(eval(value[0])) return features
def load_features(feature_file): """ load features from txt file """ features = list() for line in read_lines(feature_file): feature_dict = dict() for feature in line.strip().split(): key, value = feature.split('=')[0], feature.split('=')[1] feature_dict[key] = value features.append(feature_dict) return features
def naive_features(data): features = [] fields = ['Tag%d' % i for i in range(1, 6)] for i in range(len(data)): tags = {} label = data['OpenStatus'][i] for f in fields: tag = data[f][i] if not pd.isnull(tag): tags[tag] = 1 features.append((tags, label)) return features
def naive_features(data): features = [] fields = ['Tag%d' % i for i in range(1,6)] for i in range(len(data)): tags = {} label = data['OpenStatus'][i] for f in fields: tag = data[f][i] if not pd.isnull(tag): tags[tag] = 1 features.append((tags, label)) return features
def plot_feature(self, feature_name, plot_all=False): n_classes = 10 colors = ['dimgray', 'red', 'brown', 'darkgray', 'yellow', 'palegreen', 'seagreen', 'deepskyblue', 'navy', 'deeppink'] genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] # get column vector of class assignments classes = self.get_feature('class') # for basic features w/string name use get_feature if isinstance(feature_name, str): # get a matrix of the required features feature_vector = self.get_feature(feature_name) else: # assume feature is a function on a single entry of glob.data # returning a 1x2 numpy arrray features = [] for i in range(len(self.data)): features.append(feature_name(self.data[i])) feature_vector = np.vstack(features) if feature_vector.shape[1] != 2: raise # plot them by class plt.figure(1) plot_handles = [] for i in range(1,n_classes + 1): # extract by class subset = feature_vector[classes[:,0]==i,:] if plot_all: # plot all in a given class in the same color plot_handles.append( plt.scatter(subset[:,0], subset[:,1], c = colors[i-1])) else: # plot the mean of each class and use std for error bars plot_handles.append( plt.errorbar(np.nanmean(subset[:,0]), np.nanmean(subset[:,1]), xerr = np.nanstd(subset[:,0]), yerr = np.nanstd(subset[:,1]), mfc = colors[i-1], mec = colors[i-1], ecolor = colors[i-1], fmt = 'o')) plt.title('2D Feature space by genre') plt.xlabel('Parameter 1') plt.ylabel('Parameter 2') plt.legend(plot_handles, [genres[j] for j in range(n_classes)]) plt.show()
def convert_data(data_name, feature_list, max_features, dataset): features = [] lbl = [] with open(data_name, 'r') as f: for line in f: y = int(line[0]) + 1 lbl.append(y) sentence = line_to_words(line, dataset) all_features = [feat.sentenceToFeatures(sentence) for feat in feature_list] sentence_features = reduce(lambda l1, l2: l1+l2, all_features, []) pad_sentence(sentence_features, max_features) features.append(sentence_features) return np.array(features, dtype=np.int32), np.array(lbl, dtype=np.int32)
def __feature_importance_bar_plot(self, fig, subplot_pos): features = [] importance = [] file_stream = open(self.args.ranked_features, 'r') for line in file_stream: line = line.strip().split() features.append(line[0].replace('_', ' ')) importance.append(float(line[1])) ax = fig.add_subplot(subplot_pos[0], subplot_pos[1], subplot_pos[2]) ax.bar([x for x in xrange(len(importance))], importance) ax.set_xlabel('Features', fontsize=8) ax.set_xticks(numpy.arange(len(importance))) ax.set_xticklabels(features, rotation=90, fontsize=6) ax.set_ylabel('Importance', fontsize=8) ax.yaxis.set_tick_params(labelsize=8) ax.xaxis.set_tick_params(labelsize=6)
def convert_data(data_name, feature_list, max_features, dataset): features = [] lbl = [] with open(data_name, 'r') as f: for line in f: y = int(line[0]) + 1 lbl.append(y) sentence = line_to_words(line, dataset) all_features = [ feat.sentenceToFeatures(sentence) for feat in feature_list ] sentence_features = reduce(lambda l1, l2: l1 + l2, all_features, []) pad_sentence(sentence_features, max_features) features.append(sentence_features) return np.array(features, dtype=np.int32), np.array(lbl, dtype=np.int32)
def simple_extractors(dialogs_df, extractors): features = [] for ex in extractors: feat = dialogs_df['dialog'].apply(ex).apply(pd.Series, 1) # naming with consciousness feat_len = int(feat.shape[1] / 2) if feat_len > 1: feat.columns = [ '{ex}_{side}_{i}'.format(ex=ex.__name__, side=s, i=i) for s in ['self', 'that'] for i in range(1, feat_len + 1) ] else: feat.columns = [ '{ex}_{side}'.format(ex=ex.__name__, side=s) for s in ['self', 'that'] ] features.append(feat) features_df = pd.concat(features, axis=1) return features_df
def one_instance(params, feat, rep): P = evolve.init(params) # assign fitness for the first generation (not necessary though!) ev = list(map(lambda s: evolve.eval_string(s, P, params), P['parents'])) P['fitness'] = ev feats = features.append( P, feat, params, 0, rep) #init msmt, alt could first do one round of selection init_params = params.copy() #for dynamic params rep_params = params.copy() #dynamics will overwrite as it goes var_t, sel_t, feat_t, che_t = 0, 0, 0, 0 for i in range(params['iters']): #t0=time.time() evolve.variation(P, rep_params, i, init_params) #t1=time.time() evolve.select(P, rep_params) #t2=time.time() feats = features.append(P, feat, rep_params, i + 1, rep) #msre AFTER update #t3=time.time() if params['debug']: evolve.check(P, rep_params) #t4=time.time() #var_t += t1-t0 #sel_t += t2-t1 #feat_t += t3-t2 #che_t += t4-t3 #times = {'var':var_t,'sel':sel_t,} #print(var_t,sel_t,feat_t,che_t) return feats