def find_lowest(self): self.__get_data__() min_distance = 0 min_path = None min_mid = None candidates = get_molecules.get_mid_list(conn) i = 0 for mid in candidates: if mid is 144: continue if mid in [3,4,94,35,34,49, 38, 37,36, 39, 92]: continue #if mid in [3, 1, 29, 48, 22, 76, 125, 24, 83, 46, 80, 82, 44]: # continue #print mid frequencies, intensities = get_peaks.get_frequency_intensity_list(conn, mid)#, #max=self.max_frequency, #min=0) #frequencies, intensities = get_peaks.get_frequency_intensity_list(conn, mid) distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean) if min_path is None: min_path = path min_distance = distance min_mid = mid elif distance < min_distance: min_distance = distance min_path = path min_mid = mid print min_distance print min_mid print get_molecules.getName(conn, min_mid)
def nearest(self, rsp): test_seq = rsp all_dists = [[fastdtw(test_seq, r, dist=euclidean)[0] for r in random.sample(class_seqs, self.K)] for class_seqs in self.tr_seqs] min_dists = [min(dists) for dists in all_dists] return np.argmin(min_dists)
def process(filename): ''' The function derives dtw alignment path given source mag and target mag :param filename: path to src mag file ''' file_id = os.path.basename(filename).split(".")[0] print(file_id) ### DTW alignment -- align source with target parameters ### src_mag_file = os.path.join(src_feat_dir, file_id + ".mag") tgt_mag_file = os.path.join(tgt_feat_dir, file_id + ".mag") src_features, src_frame_number = io_funcs.load_binary_file_frame(src_mag_file, mag_dim) tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame(tgt_mag_file, mag_dim) ### dtw align src with tgt ### distance, dtw_path = fastdtw.fastdtw(src_features, tgt_features) ### load dtw path dtw_path_dict = load_dtw_path(dtw_path) assert len(dtw_path_dict)==tgt_frame_number # dtw length not matched ### align features aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".mag") , os.path.join(src_aligned_feat_dir, file_id + ".mag") , mag_dim , dtw_path_dict) aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".real"), os.path.join(src_aligned_feat_dir, file_id + ".real"), real_dim, dtw_path_dict) aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".imag"), os.path.join(src_aligned_feat_dir, file_id + ".imag"), imag_dim, dtw_path_dict) aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".lf0") , os.path.join(src_aligned_feat_dir, file_id + ".lf0") , lf0_dim , dtw_path_dict)
def isCorePoint(minPts, epsDist, currlong, currlat, minlong, minlat, rangelong, rangelat, timeindex, timewindow): # for i in range(num_files): # f = netcdf.netcdf_file('air.2m.gauss.' + str(i+1979) + '.nc', 'r') # print(f.air) countwithineps=1 #count self. hence 1 #currtimeslice = [] #for iter in range(-1*timewindow/2+1, timewindow/2+1): ## [-4, -3, -2, -1, 0, 1, 2, 3, 4] #currtimeslice.append(f.variables['air'][timeindex + iter][0][currlat][currlong]) lowertimeindex=timeindex - timewindow/2 uppertimeindex=timeindex + timewindow/2 + 1 currtimeslice = f.variables['air'][lowertimeindex : uppertimeindex, 0, currlat, currlong] currindex=(currlat - minlat)*rangelong + (currlong - minlong) smallDTWDict={} #key is index of neighbourlong and neighbourlat for iter in range(8): neighbourindex=(currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong) #checking neighbours of current, using neighbourlong and neighbourlat #neighbourtimeslice=[] #for iter2 in range(-1*timewindow/2+1, timewindow/2+1): # neighbourtimeslice.append(f.variables['air'][timeindex + iter2][0][currlat+iter][currlong+iter]) neighbourtimeslice = f.variables['air'][lowertimeindex : uppertimeindex, 0, currlat+neighbourlat[iter], currlong+neighbourlong[iter]] #distance variable available outside if statement. if str(neighbourindex)+"_"+str(currindex) in distancedict: #if previously inserted, from current vertex's perspective: "neighbourindex_currindex" distance = distancedict[str(neighbourindex)+"_"+str(currindex)] else: distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean) encode=str(currindex)+"_"+str(neighbourindex) #not previously inserted. so cache it. from current vertex's perspective: "currindex_neighbourindex" distancedict[encode]=distance del distancedict[encode] #don't need anymore. neighbour fetched the distance. keep memory util low. #print ("iter: ", iter) #print ("distance:", distance) smallDTWDict[iter]=distance if distance < epsDist: countwithineps+=1 if countwithineps >= minPts: globalcorepoints.add(currindex) if currindex in globalnoisepoints: globalnoisepoints.remove(currindex) for iter in range(8): #Add edge to points which are within epsDist. use smallDTWDict neighbourindex=(currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong) if smallDTWDict[iter] < epsDist: #Consider neighbors for spatially bordered point but don't add them to the graph if currlat+neighbourlat[iter] >= minlat and currlat+neighbourlat[iter] <minlat+rangelat and currlong+neighbourlong[iter] >=minlong and currlong+neighbourlong[iter]<minlong+rangelong: #print(currindex) #print(neighbourindex) #print(currlat+neighbourlat[iter]) #print(minlat) #print(rangelat) #print(currlong+neighbourlong[iter]) #print(minlong) #print(rangelong) g.add_edge(currindex, neighbourindex) if (neighbourindex) in globalnoisepoints: globalnoisepoints.remove(neighbourindex)
def __init__(self, x, y, dist=lambda x, y: numpy.linalg.norm(x - y), radius=1) -> None: assert x.ndim == 2 and y.ndim == 2 _, path = fastdtw.fastdtw(x, y, radius=radius, dist=dist) path = numpy.array(path) self.normed_path_x = path[:, 0] / len(x) self.normed_path_y = path[:, 1] / len(y)
def search(self,value): # type(value) = list memo = [] memo_distance = [] new_value = [] # spare 5 # for tmp in value: if tmp == 2 : new_value.append(10) elif tmp ==6: new_value.append(3) elif tmp ==1: new_value.append(11) elif tmp ==3: new_value.append(13) elif tmp ==7: new_value.append(14) elif tmp ==9: new_value.append(15) else : new_value.append(tmp) for pathStr, pathList in self.pathDictionary.iteritems(): distance, path = fastdtw(new_value, pathList, dist=euclidean) memo.append(pathStr) memo_distance.append(distance) print ('[*] Min_DTW_Distance : %s'%(str(min(memo_distance))) ) if min(memo_distance) < 40: idx = memo_distance.index(min(memo_distance)) return memo[idx] else: return 'None'
def dist(X,Y): CX = np.std(X,axis = 0) CY = np.std(Y,axis = 0) M = len(raio) c = 0 for i in np.arange(M): c = c + fastdtw(X[i],Y[i],radius = radius)[0]/(CX[i] + CY[i] + beta) return c
def compute_distance(time_series1, time_series2, distance_measure='sts'): if distance_measure == 'sts': # Take the difference of the slopes, square and sum # The square distance is OK here, we don't need to sqrt it distance = np.sum(np.subtract(time_series1.slopes, time_series2.slopes)**2) elif distance_measure == 'dtw': distance, path = fastdtw(time_series1.data, time_series2.data) return distance
def check_max_distance(self, frequencies, list): distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean) if self.min_distance is None: self.min_distance = distance self.min_path = path self.combination_list = list elif distance < self.min_distance: self.min_distance = distance self.min_path = path self.combination_list = list
def compute_dtw_dist(part_list, degree_list, dist_func): dtw_dict = {} for v1, nbs in part_list: lists_v1 = degree_list[v1] # orderd degree list of v1 for v2 in nbs: lists_v2 = degree_list[v2] # orderd degree list of v2 max_layer = min(len(lists_v1), len(lists_v2)) dtw_dict[v1, v2] = {} for layer in range(0, max_layer): dist, path = fastdtw(lists_v1[layer], lists_v2[layer], radius=1, dist=dist_func) dtw_dict[v1, v2][layer] = dist return dtw_dict
def realtime_dtw(self, mfccs): ref, ref_labels = self.load_data(case=False) num = int(self.num_samples / 10) aver = [] cost = [] for j in range(len(ref)): distance, path = fastdtw(ref[j], mfccs, dist=euclidean) cost.append(distance) if (j + 1) % num == 0: aver.append(sum(cost) / num) pred = np.argmin(aver) print(pred, '\n')
def align(ref, el, radius=5): distance, profile = fastdtw(ref, el, radius=5) new_ref = [] new_el = [] for p in profile: new_ref.append(ref[p[0]]) new_el.append(el[p[1]]) return distance, np.array(new_ref), np.array(new_el)
def pairwise_fastdtw(X, **kwargs): X = [list(enumerate(pattern)) for pattern in X] triu = [ fastdtw(X[i], X[j], **kwargs)[0] if i != j else 0 for i in range(len(X)) for j in range(i, len(X)) ] matrix = np.zeros([len(X)] * 2) matrix[np.triu_indices(len(X))] = triu matrix += np.tril(matrix.T, -1) return matrix
def DTW_Matrix(feature_matrix): from fastdtw import fastdtw from scipy.spatial.distance import euclidean print("Starting Dynamic Time Warping between signals..") dtw_matrix = np.zeros(((feature_matrix.shape[0]), (feature_matrix.shape[0]))) for i in range(feature_matrix.shape[0]): for j in range(i, (feature_matrix.shape[0]), 1): d, path = fastdtw(feature_matrix[i,:], feature_matrix[j,:], dist = euclidean) dtw_matrix[i][j] = d print("Successfully developed matrix.") return dtw_matrix
def StartClassification(self, ArrayToClassify): result = 0.0 shortest_path = "" # looping on every stroke in dataset and add in array for classification for i in TrainingFilePath: file = i.split() dataset_wave = [] f = open(str(file), "r") # adding every frame data in arr2 then add all frames in dataset_wave for x in f: line = x.split() arr2 = [] for l in range(15): arr2.append(float(line[i])) dataset_wave.append(arr2) dataset_wavee = np.array(dataset_wave) app_wavee = np.array(ArrayToClassify) distance = 0.0 distance, path = fastdtw(dataset_wavee, app_wavee, dist=euclidean) if (result == 0.0): result = distance shortest_path = file elif (result > distance): result = distance shortest_path = file string = shortest_path.split('/') isMistake = False StrokeType = "" ErrorType = "" if (string[1] == "Wrong"): self.noWrong += 1 isMistake = True ErrorType = string[3] else: self.noCorrect += 1 StrokeType = string[2] Session.GetSessionInfo(ArrayToClassify) Session.InsertClassificationResult(ErrorType, isMistake, StrokeType)
def fastdtw_algorithm(t1, t2, key1, key2, n1=None, n2=None): """ 快速动态时间规整算法 :param t1:时间序列1 :param t2:时间序列2 :param n1:时间序列1对应的事件的个数序列 :param n2:时间序列2对应的事件的个数序列 :param key1:时间序列1标志 :param key2:时间序列2标志 :return: """ # t为两时间序列的近似距离,默认绝对值距离作为度量标准。 # path为对应关系。每个元素为一个2维元组。 # 执行动态时间规整 t, path = fastdtw(t1, t2) # 获得每一对“下标对应关系” index1 = [item[0] for item in path] index2 = [item[1] for item in path] # dtw算法得到的对齐的新的时间序列 new_t1 = [t1[item] for item in index1] new_t2 = [t2[item] for item in index2] # 绘图,便于可视化,在y=1和y=5画线 y1 = len(new_t1) * [1] y2 = len(new_t2) * [5] l1 = plt.scatter(new_t1, y1, c='r') l2 = plt.scatter(new_t2, y2, c='g') plt.ylim(0, 6) """绘制dtw算法得到的对应关系""" # 构建坐标 coordinate1 = zip(new_t1, y1) coordinate2 = zip(new_t2, y2) # 定义坐标 xx = 0 yy = 1 # 绘制对应关系 for item1, item2 in zip(coordinate1, coordinate2): plt.plot((item1[xx], item2[xx]), (item1[yy], item2[yy]), c='b') plt.xlabel(u"时间/秒, ", fontproperties='SimHei') plt.title(u"动态时间规整对应关系图", fontproperties='SimHei') plt.legend(prop={'family': 'SimHei', 'size': 15}) plt.legend(handles=[ l1, l2, ], labels=[key1, key2], loc='upper left') plt.show() # 绘制QQ图 plot_qq(new_t1, new_t2, key1, key2, n1, n2)
def dtw_distance_one_vs_all(data): """ Calculates the Dynamic Time Warping distance from a base template example """ global y n, _ = data.shape dist = np.zeros(shape=n) for i in xrange(n): dist[i], _ = fastdtw(data[i], y, dist=euclidean) sys.stdout.write('.') sys.stdout.flush() return dist.reshape(-1, 1)
def ts_distance(x,y,radius=2): """ Compute Dynamic Time Warping Distance for two time series. Rule of thumb: the first element of X tells if the area is not valid (-2000 is Null value) """ if x[0] < 0 or y[0] < 0: return 0.0 else: dist, path = dtw.fastdtw(x, y, radius=radius) std1 = np.std(x) std2 = np.std(y) return dist/np.sqrt(std1*std2)
def cal_fastdtw(self, cycle1, cycle2, rotate): """Return fast dtw distance of the two cycles. :param cycle1: the first cycle stored the normalized maginitude data. :param cycle2: the second cycle stored the normalized maginitude data. :return: Return the dtw distance. NOte: when the lengths of the data in two cycle is the same, dtw and manhattan are the same. """ min_distance, path = fastdtw(cycle1, cycle2, dist=euclidean) if rotate: for i in range(len(cycle1)): distance, path = fastdtw(np.roll(cycle1, i), cycle2, dist=euclidean) if distance < min_distance: min_distance = distance else: pass return min_distance
def distance_analysis(self): data = self.time_series_data Name = data['Name'] companies = list(set(Name)) distances_o_c = [] distances_h_l = [] for index, company in enumerate(companies): print index + 1, company all_time_series = data.loc[data['Name'] == company] ts_open = np.array(all_time_series['open']) ts_open = ts_open[~np.isnan(ts_open)] ts_close = np.array(all_time_series['close']) ts_close = ts_close[~np.isnan(ts_close)] ts_high = np.array(all_time_series['high']) ts_high = ts_high[~np.isnan(ts_high)] ts_low = np.array(all_time_series['low']) ts_low = ts_low[~np.isnan(ts_low)] distance_o_c, path = fastdtw(ts_open, ts_close, dist=euclidean) distance_h_l, path = fastdtw(ts_high, ts_low, dist=euclidean) distances_o_c.append(distance_o_c) distances_h_l.append(distance_h_l) print np.max(distances_o_c), np.min(distances_o_c), np.mean( distances_o_c), np.std(distances_o_c) print np.max(distances_h_l), np.min(distances_h_l), np.mean( distances_h_l), np.std(distances_h_l) eje_x = [x for x in range(len(companies))] legends = ['Open vs Close', 'High vs Low'] plt.scatter(eje_x, distances_o_c, c='blue', s=20) plt.scatter(eje_x, distances_h_l, c='red', s=20) plt.legend(legends, loc='upper right') plt.xlabel('Companies from SP-500') plt.ylabel('Distances') plt.title( 'Comparing the distances between Open vs Close and High vs Low values' ) plt.show()
def DTWSimilarity(self, dataX, dataY, gyroscope=False): ''' Provide the loaded dataFiles, then this function will extract the accX, accY & accZ for you. When gyroscope parameter is set to True, it will also calculate the alpha, beta and gamma aswell, but disabled due to speed Arguments: dataX: First data object dataY: Second data object ''' self.dataX = dataX self.dataY = dataY X, path = fastdtw(dataX['accX'], dataY['accX']) Y, path = fastdtw(dataX['accY'], dataY['accY']) Z, path = fastdtw(dataX['accZ'], dataY['accZ']) if (gyroscope): alpha, path = fastdtw(dataX['alpha'], dataY['alpha']) beta, path = fastdtw(dataX['beta'], dataY['beta']) gamma, path = fastdtw(dataX['gamma'], dataY['gamma']) ''' Calculate similarity function as written in paper: (Akl, A., Feng, C., & Valaee, S. (2011). A novel accelerometer-based gesture recognition system. IEEE Transactions on Signal Processing, 59(12), 6197-6205. ISO 690) ''' if (gyroscope): #return -1 * ((X**2) + (Y**2) + (Z**2) + (alpha**2) + (beta**2) + (gamma**2)) return math.sqrt((X**2) + (Y**2) + (Z**2) + (alpha**2) + (beta**2) + (gamma**2)) else: #first line looks like euclidian, second line is the paper implementation. #return -1 * ((X**2) + (Y**2) + (Z**2)) return math.sqrt((X**2) + (Y**2) + (Z**2))
def make(save_name, tag, X, Y, Z): col_count = np.array(X).shape[0] score = [0] * col_count score_b = [0] * col_count for i in range(0, col_count): sum = 0 for j in range(0, col_count): if i != j: dist_x, path_x = fastdtw(X[i], X[j]) #dist_y, path_y = fastdtw(Y[i], Y[j]) dist_z, path_z = fastdtw(Z[i], Z[j]) dist = (dist_x * dist_x) + (dist_z * dist_z) sum = dist * dist score[i] = sum print(tag + "\tsum : " + str(score[i])) workbook = xlwt.Workbook(encoding='utf-8') # utf-8 인코딩 방식의 workbook 생성 ws_x = workbook.add_sheet("x") # 시트 생성 ws_y = workbook.add_sheet("y") # 시트 생성 ws_z = workbook.add_sheet("z") # 시트 생성 for i in range(0, col_count): min = 9999999999999 min_index = 0 for j in range(0, col_count): if score_b[j] == 0: if score[j] < min: min = score[j] min_index = j score_b[min_index] = 1 print("result min : " + str(min) + "\tindex : " + str(min_index)) xlen = np.array(X[min_index]).shape[0] for j in range(0, xlen): ws_x.write(i, j, X[min_index][j]) ws_y.write(i, j, Y[min_index][j]) ws_z.write(i, j, Z[min_index][j]) workbook.save(save_name)
def calc_distances_all(vertices,list_vertices,degreeList, commonList, part, compactDegree = False): distances_r = {} distances_q = {} cont = 0 if compactDegree: dist_func = cost_max else: dist_func = cost for v1 in vertices: lists_v1 = degreeList[v1] common_v1 = commonList[v1] for v2 in list_vertices[cont]: lists_v2 = degreeList[v2] common_v2 = commonList[v2] max_layer = min(len(lists_v1),len(lists_v2)) distances_r[v1,v2] = {} distances_q[v1,v2] = {} for layer in range(0,max_layer): #t0 = time() dist_r, path = fastdtw(lists_v1[layer],lists_v2[layer],radius=1,dist=dist_func) dist_q, path = fastdtw(common_v1[layer],common_v2[layer],radius=1,dist=cost) #t1 = time() #logging.info('D ({} , {}), Tempo fastDTW da camada {} : {}s . Distância: {}'.format(v1,v2,layer,(t1-t0),dist)) distances_r[v1,v2][layer] = dist_r distances_q[v1,v2][layer] = dist_q cont += 1 preprocess_consolides_distances(distances_r) preprocess_consolides_distances(distances_q) saveVariableOnDisk(distances_r,'distances-r-'+str(part)) saveVariableOnDisk(distances_q,'distances-q-'+str(part)) return
def Harsh_ChangeLine_Left( input_df, Model_param=Model_param['troca_faixa_esquerda_agressiva']): r1 = 3 x1 = fastdtw(input_df, Model_param[r1][1])[0] if x1 <= Model_param[r1][0]: r2 = 1 x2 = fastdtw(input_df, Model_param[r2][1])[0] if x2 <= 511.02: r3 = 0 x3 = fastdtw(input_df, Model_param[r3][1])[0] if x3 <= Model_param[r3][0]: return True else: return False else: return True else: r2 = 1 x2 = fastdtw(input_df, Model_param[r2][1])[0] if x2 <= 837.11: r3 = 1 x3 = fastdtw(input_df, Model_param[r3][1])[0] if x3 <= 408.96: return True else: return False else: r3 = 2 x3 = fastdtw(input_df, Model_param[r3][1])[0] if x3 <= Model_param[r3][0]: return True else: return False
def calculate( file_list: List[str], gt_file_list: List[str], args: argparse.Namespace, mcd_dict: Dict, ): """Calculate MCD.""" for i, gen_path in enumerate(file_list): corresponding_list = list( filter(lambda gt_path: _get_basename(gt_path) in gen_path, gt_file_list)) assert len(corresponding_list) == 1 gt_path = corresponding_list[0] gt_basename = _get_basename(gt_path) # load wav file as int16 gen_x, gen_fs = sf.read(gen_path, dtype="int16") gt_x, gt_fs = sf.read(gt_path, dtype="int16") fs = gen_fs if gen_fs != gt_fs: gt_x = librosa.resample(gt_x.astype(np.float), gt_fs, gen_fs) # extract ground truth and converted features gen_mcep = sptk_extract( x=gen_x, fs=fs, n_fft=args.n_fft, n_shift=args.n_shift, mcep_dim=args.mcep_dim, mcep_alpha=args.mcep_alpha, ) gt_mcep = sptk_extract( x=gt_x, fs=fs, n_fft=args.n_fft, n_shift=args.n_shift, mcep_dim=args.mcep_dim, mcep_alpha=args.mcep_alpha, ) # DTW _, path = fastdtw(gen_mcep, gt_mcep, dist=spatial.distance.euclidean) twf = np.array(path).T gen_mcep_dtw = gen_mcep[twf[0]] gt_mcep_dtw = gt_mcep[twf[1]] # MCD diff2sum = np.sum((gen_mcep_dtw - gt_mcep_dtw)**2, 1) mcd = np.mean(10.0 / np.log(10.0) * np.sqrt(2 * diff2sum), 0) logging.info(f"{gt_basename} {mcd:.4f}") mcd_dict[gt_basename] = mcd
def algo(input_file_name): # input_file_name -> must include dir + name. all_options_path = r'/home/akiva/Documents/Do_not_delete/all_options_emphsis' audio_sentence = get_audio_sentence(input_file_name) print('audio_sentence = ', audio_sentence) # creates all the emphasis options: a = create_all_emph_options(all_options_path, audio_sentence) print('a=', a) # dtw: frames = 50 # 20 first_frame = 30 mfccs = 20 # upto 26! test_frac = 0.2 # input_file_name = './DTW single file/see%the%bombers fly up.wav' # Extract MFCCs from input wav file print(input_file_name) (rate, sig) = wav.read(input_file_name) mfcc_feat = mfcc(sig, rate) curr = logfbank(sig, rate) input_file_data = curr[first_frame:(first_frame + frames), 0:mfccs] / 20 # Extract MFCCs from each permutation of 1 emphasized word file_names = [] distances = [] #for file_name in listdir('./DTW single file'): for file_name in listdir(all_options_path): print(file_name) file_names.append(file_name) #(rate, sig) = wav.read("./DTW single file/" + file_name) (rate, sig) = wav.read(all_options_path + "/" + file_name) mfcc_feat = mfcc(sig, rate) curr = logfbank(sig, rate) current_file_data = curr[first_frame:(first_frame + frames), 0:mfccs] / 20 distance, path = fastdtw(input_file_data, current_file_data, dist=euclidean) distances.append(distance) print(file_names) print(distances) min_distance_index = min(enumerate(distances), key=itemgetter(1))[0] print(min_distance_index) s = file_names[min_distance_index] c = '%' start_end_indexes = [pos for pos, char in enumerate(s) if char == c] print(start_end_indexes) answer = file_names[min_distance_index][start_end_indexes[0] + 1:start_end_indexes[1]] return answer
def pattern_finder(bName_1, bName_2): bName1_csv = bName_1 + ".csv" bName2_csv = bName_2 + ".csv" bData_1 = pd.read_csv(bName1_csv) bData_2 = pd.read_csv(bName2_csv) # Create pandas data frame from csv df_1 = pd.DataFrame(bData_1, columns=['id', 'date', 'stars', 'text']) df_2 = pd.DataFrame(bData_2, columns=['id', 'date', 'stars', 'text']) # https://stackoverflow.com/questions/44128600/how-should-i-handle-duplicate-times-in-time-series-data-with-pandas # Data frame one for time series 1 df_1['date'] = pd.to_datetime(df_1['date'], format='%Y-%m-%d') df_1['date'] = df_1['date'] + pd.to_timedelta( df_1.groupby('date').cumcount(), unit='h') df_1 = df_1.sort_values(by=['date']) new_df_1 = df_1.set_index('date') new_df_1 = new_df_1.ix['2012-6-1':'2017-5-1'] #print new_df_1 ts_1 = pd.Series.rolling(new_df_1['stars'], window=100).mean() ts_1 = ts_1.dropna() # Data frame two for time series 2 df_2['date'] = pd.to_datetime(df_2['date'], format='%Y-%m-%d') df_2['date'] = df_2['date'] + pd.to_timedelta( df_2.groupby('date').cumcount(), unit='h') df_2 = df_2.sort_values(by=['date']) new_df_2 = df_2.set_index('date') new_df_2 = new_df_2.ix['2012-6-1':'2017-5-1'] #print new_df_2['stars'] #print new_df_2 ts_2 = pd.Series.rolling(new_df_2['stars'], window=100).mean() ts_2 = ts_2.dropna() # Following is done because DTW requires same length time series compare_len = min(len(ts_1), len(ts_2)) dist = np.linalg.norm(ts_1.iloc[-compare_len:].values - ts_2.iloc[-compare_len:].values) #print dist x = ts_1.iloc[-compare_len:].values y = ts_2.iloc[-compare_len:].values #print x #print y dtw_dist, path = fastdtw(x, y, dist=euclidean) print dtw_dist return dtw_dist
def main(): hole_time = time.time() trainSet = pd.read_csv('train_set.csv', converters={"Trajectory": literal_eval}) testSet = pd.read_csv('test_set_a1.csv', converters={"Trajectory": literal_eval}) for i in range(0, 5): start_time = time.time() lon = [] lat = [] dist = [] param = [] for y in range(0, len(testSet['Trajectory'][i])): temp = [ testSet['Trajectory'][i][y][1], testSet['Trajectory'][i][y][2] ] lon.append(testSet['Trajectory'][i][y][1]) lat.append(testSet['Trajectory'][i][y][2]) param.append(temp) for k in range(1, len(trainSet)): paramtrain = [] for y in range(0, len(trainSet['Trajectory'][k])): temp = [ trainSet['Trajectory'][k][y][1], trainSet['Trajectory'][k][y][2] ] paramtrain.append(temp) distance, path = fastdtw(np.asarray(param), np.asarray(paramtrain), dist=haversine) dist.append((distance, trainSet['journeyPatternId'][k], k)) sortedDist = sorted(dist, key=operator.itemgetter(0)) elapsed_time = time.time() - start_time gmap = gmplot.GoogleMapPlotter(lat[len(lat) / 2], lon[len(lon) / 2], 11) gmap.plot(lat, lon, 'forestgreen', edge_width=9) name = "test_" + str(i + 1) + "_time_" + str(elapsed_time) + ".html" gmap.draw(name) for y in range(0, 5): l = sortedDist[y][2] lontrain = [] lattrain = [] for m in range(0, len(trainSet['Trajectory'][l])): lontrain.append(trainSet['Trajectory'][l][m][1]) lattrain.append(trainSet['Trajectory'][l][m][2]) gmap = gmplot.GoogleMapPlotter(lattrain[len(lattrain) / 2], lontrain[len(lontrain) / 2], 11) gmap.plot(lattrain, lontrain, 'forestgreen', edge_width=9) name = "test_" + str(i + 1) + "_neighbor_" + str(y) + "_jp_" + str( sortedDist[y][1]) + "dist" + str(sortedDist[y][0]) + ".html" gmap.draw(name) hole_time = time.time() - hole_time
def _fastdtw(args): arr1, a, b, radius = args arr2 = parse(b) return ( a, b, fastdtw( arr1, arr2, # dist=lambda x, y: (int(x) != int(y)), # When not using https://github.com/orestisfl/fastdtw/ radius=radius, )[0], )
def dtw_dist(mfcc1, mfcc2): '''Dynamic time warping''' min_len = min(mfcc1.shape[1], mfcc2.shape[1]) #print(mfcc1.shape, mfcc2.shape) mfcc1 = mfcc1[:, :min_len].T mfcc2 = mfcc2[:, :min_len].T dist, path = fastdtw(mfcc1, mfcc2, dist=euclidean) dist /= mfcc1.shape[0] * mfcc2.shape[0] return dist
def testFunc(): timewindow=9 iter=0 for timeIndex in range (4,5,1): for iterlong in range(1,190): for iterlat in range(1,92): currtimeslice = f.variables['air'][timeIndex - timewindow/2 : timeIndex + timewindow/2 + 1, 0, iterlat, iterlong] for iter in range(8): neighbourtimeslice = f.variables['air'][timeIndex - timewindow/2 : timeIndex + timewindow/2 + 1, 0, iterlat +neighbourlat[iter], iterlong+neighbourlong[iter]] distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean) #print distance dist.append((distance,iter)) iter+=1
def perform_dtw(ts_1, ts_2): # Following is done because DTW requires same length time series compare_len = min(len(ts_1),len(ts_2)) x = ts_1.iloc[-compare_len:].values y = ts_2.iloc[-compare_len:].values dtw_dist, path = fastdtw(x,y, dist=euclidean) print dtw_dist return dtw_dist
def perform_quartile_dtw(ts_1, ts_2): # Following is done because DTW requires same length time series compare_len = min(len(ts_1),len(ts_2)) x = ts_1[-compare_len:] y = ts_2[-compare_len:] dtw_dist, path = fastdtw(ts_1, ts_2, dist=euclidean) print dtw_dist return dtw_dist
def dist(df): res = pd.DataFrame(index=np.arange(df.shape[1]), columns=np.arange(df.shape[1])) for i in range(df.shape[1]): res.iloc[i, i] = 0 for j in range(i + 1, df.shape[1]): #d,p = fastdtw(df[i].dropna(),df[j].dropna(), dist = euclidean) d, p = fastdtw(df[i].dropna().values, df[j].dropna().values, dist=euclidean) res.iloc[i, j] = d res.iloc[j, i] = d return res
def run_dtw_process(params): ref_key, point_clouds = params dtw_results = dict() cost = 0 pi = point_clouds[ref_key] for k, pj in point_clouds.items(): print("start dtw", ref_key, k) #path, D = run_dtw(pi, pj) #path_cost = sum([D[c[0], c[1]] for c in path]) path_cost, path = fastdtw(pi, pj, dist=_transform_invariant_point_cloud_distance) dtw_results[k] = path cost += path_cost return cost/len(point_clouds), dtw_results
def calculate_distance(data, source, target): try: # calculate distance of source and target for each task s = data[source].T t = data[target].T distance, _ = fastdtw(s, t, radius=100) if distance >= 9999999: print(' ======== source: ', source, 'target: ', target, 'distance: ', distance) similarity = distance / 10000000 return similarity except Exception as ex: raise_exception(calculate_distance.__name__, ex)
def dtwDist(x, y): try: from fastdtw import fastdtw except ImportError: util.missing_module('fastdtw') try: from scipy.spatial.distance import euclidean except ImportError: util.missing_module('scipy') """Dynamic Time Warping Distance""" dist, path_data = fastdtw(x, y, dist=euclidean) #print('\n\n', path_data, '\n\n') return dist, path_data
def getDTWPath(x, y): distance, path = fastdtw(x, y, dist=euclidean) plt.plot(x, label='x') plt.plot(y, label='y') for x_, y_ in path: plt.plot([x_, y_], [x[x_], y[y_]], color='gray', linestyle='dotted', linewidth=1) plt.legend() plt.title('Our two temporal sequences') plt.show() return path
def NCC(centroids, instance): """ Returns dictionary with distances between instance and centroid per class and class label of minimal distance using fastdtw """ distances = {label : fastdtw.fastdtw(instance, centroids[label])[0] for label in centroids.keys()} class_label = min(distances, key=distances.get) return distances, class_label
def dtwDist(x, y): try: from fastdtw import fastdtw except ImportError: print(ImportError, "fastdtw package is not installed.") try: from scipy.spatial.distance import euclidean except ImportError: print(ImportError, "scipy package is not installed.") """Dynamic Time Warping Distance""" dist, _ = fastdtw(x, y, dist=euclidean) return dist
def run(self): result = np.empty((0, self.len_data), float) for i in self.my_range: x = pd.concat([pd.DataFrame(range(1,len(self.data)+1)),self.data.ix[:,i]], axis =1) x = np.array(x) temp = np.empty(shape = [1, self.len_data]) for j in range(self.len_data): y = pd.concat([pd.DataFrame(range(1,len(self.data)+1)),self.data.ix[:,i]], axis =1) y = np.array(y) distance = fastdtw(x, y, dist= cosine) temp[0,j] = distance result = np.append(result, np.array(temp), axis=0) score = pd.DataFrame(data = result) score.to_csv(out_path +str(self.index).zfill(3) + "thread.txt", header = None, index = False)
def dtw_pr(pr0, pr1): # Flatten pr to compute the path pr0_flat = sum_along_instru_dim(pr0) pr1_flat = sum_along_instru_dim(pr1) def fun_thresh(y): return np.minimum(y, 1).astype(int) distance, path = fastdtw(pr0_flat, pr1_flat, dist=lambda a, b: euclidean(fun_thresh(a), fun_thresh(b))) # Get paths path0 = [e[0] for e in path] path1 = [e[1] for e in path] pr0_warp = warp_pr_aux(pr0, path0) pr1_warp = warp_pr_aux(pr1, path1) return pr0_warp, pr1_warp
def testFunc(): global globaldist timewindow=9 iter=0 temp=range(4,7) for timeIndex in temp: for iterlong in range(1,190): for iterlat in range(1,92): currtimeslice = f.variables['air'][timeIndex - timewindow/2 : timeIndex + timewindow/2 + 1, 0, iterlat, iterlong] currdist=[] for iter in range(8): neighbourtimeslice = f.variables['air'][timeIndex - timewindow/2 : timeIndex + timewindow/2 + 1, 0, iterlat +neighbourlat[iter], iterlong+neighbourlong[iter]] distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean) #print distance #dist.append((distance,iter)) currdist.append(distance) currdist.sort() globaldist+=currdist[0:2]
def estimate_twf(orgdata, tardata, distance='melcd', fast=True, otflag=None): """time warping function estimator Parameters --------- orgdata : array, shape(`T_org`, `dim`) Array of source feature tardata : array, shape(`T_tar`, `dim`) Array of target feature distance : str, optional distance function `melcd` : mel-cepstrum distortion fast : bool, optional Use fastdtw instead of dtw Default set to `True` otflag : str, Perform alignment into either original or target length `org` : align into original length `tar` : align into target length Default set to None Returns --------- twf : array, shape(`2`, `T`) Time warping function between original and target """ if distance == 'melcd': def distance_func(x, y): return melcd(x, y) else: raise ValueError('other distance metrics than melcd does not support.') if fast: _, path = fastdtw(orgdata, tardata, dist=distance_func) twf = np.array(path).T else: _, _, _, twf = dtw(orgdata, tardata, distance_func) if otflag is not None: twf = modify_twf(twf, otflag=otflag) return twf
def get1NN(self,mfcc_feat2): minrd=1e40 mkrd="none_none_0000" for k in sorted(self.mfccs.keys()): mfcc_feat1=self.mfccs[k] l1=len(mfcc_feat1) l2=len(mfcc_feat2) #discriminating by length if abs(l1-l2)<l2*self.ldis: distance, path = fastdtw(mfcc_feat1, mfcc_feat2, dist=euclidean) rd=distance/len(path) #Normalize distance bi path length #print k,distance,len(mfcc_feat2),len(mfcc_feat1), len(path),rd print k,len(mfcc_feat2),len(mfcc_feat1),rd if rd<minrd: minrd=rd mkrd=k if minrd>self.distThreshold: return "none_none_0000",minrd else: return mkrd,minrd
def isCorePoint(minPts, epsDist, currlong, currlat, minlong, minlat, rangelong, rangelat, timeindex, timewindow): # for i in range(num_files): # f = netcdf.netcdf_file('air.2m.gauss.' + str(i+1979) + '.nc', 'r') # print(f.air) countwithineps=1 #count self. hence 1 #currtimeslice = [] #for iter in range(-1*timewindow/2+1, timewindow/2+1): ## [-4, -3, -2, -1, 0, 1, 2, 3, 4] #currtimeslice.append(f.variables['air'][timeindex + iter][0][currlat][currlong]) currtimeslice = f.variables['air'][timeindex - timewindow/2 : timeindex + timewindow/2+1, 0, currlat, currlong] smallDTWDict={} #key is index of neighbourlong and neighbourlat for iter in range(8): #checking neighbours of current, using neighbourlong and neighbourlat #neighbourtimeslice=[] #for iter2 in range(-1*timewindow/2+1, timewindow/2+1): # neighbourtimeslice.append(f.variables['air'][timeindex + iter2][0][currlat+iter][currlong+iter]) neighbourtimeslice = f.variables['air'][timeindex - timewindow/2 : timeindex + timewindow/2+1, 0, currlat+neighbourlat[iter], currlong+neighbourlong[iter]] distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean) #print ("iter: ", iter) #print ("distance:", distance) smallDTWDict[iter]=distance if distance < epsDist: countwithineps+=1 if countwithineps >= minPts: globalcorepoints.add((currlat - minlat)*rangelong + (currlong - minlong)) if (currlat - minlat)*rangelong + (currlong - minlong) in globalnoisepoints: globalnoisepoints.remove((currlat - minlat)*rangelong + (currlong - minlong)) for iter in range(8): #Add edge to points which are within epsDist. use smallDTWDict if smallDTWDict[iter] < epsDist: #Consider neighbors for spatially bordered point but don't add them to the graph if currlat+neighbourlat[iter] in range(minlat, minlat+rangelat) and currlong+neighbourlong[iter] in range(minlong,minlong+rangelong): g.add_edge((currlat - minlat)*rangelong + (currlong - minlong), (currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong)) if (currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong) in globalnoisepoints: globalnoisepoints.remove((currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong))
def calc_dtw(x_train, x_test, train_len, test_len, radius=1, total_shifts = 7): """ Calculates the DTW distance between the test cases and the training data after applying a series of time shifts to the test data Returns an array of the DTW dist of each shifted MFCC against the training prompt, and prints out the time taken to run the calculation """ master_dist = [] for i,x in enumerate(x_test): mfcc_dist = [] # Default: For 7 total vectors - 3 shifts left, no shift, and 3 shifts right @ 15% range max_shift = x.shape[1]*0.15 # Indicate % range here # Total shifts will always be an odd number so there is the same number of shifts in each direction total_shifts = total_shifts + 1 if total_shifts % 2 == 0 else total_shifts shift = int(max_shift/int(total_shifts/2)) for d in range(shift * int(total_shifts/2) * -1, shift * int(total_shifts/2) + 1, shift): dist = [] for i2,x2 in enumerate(x_train): len_threshold = max(train_len[i2]*0.3, 5) min_thres = train_len[i2] - len_threshold max_thres = train_len[i2] + len_threshold # Run DTW dist if stored phrase is within -/+ 30% seconds as requested test phrase if min_thres <= test_len[i] <= max_thres: distance, path = fastdtw(np.roll(x,d).T, x2.T, radius=radius, dist=lambda x, y: norm(x - y)) # else assume they are not the same by assuming a very large distance else: distance = 1000000 dist.append(distance) mfcc_dist.append(dist) master_dist.append(mfcc_dist) #print('MFCCs:{0}, Radius:{1}, Time:{2:.2f} sec'.format(x_train[0].shape[0], radius)) return master_dist
def get_nearest_n_dtw(self, train, label, test): """ :param train: Training dataset. Must be pandas object. :param label: Training label. :param test: Some point of test data. Must be numpy array object. :return: The nearest points of training dataset (with DTW metrics). """ nn_dist_array, nn_ts_ls, nn_label_array = numpy.array([]), [], numpy.array([]) te_ele = numpy.array(test).reshape(-1, 1) for i, tr_ele in enumerate(train): # sys.stdout.write('\r%d' % i) # sys.stdout.flush() tr_ele_ls = tr_ele.tolist() tr_ele = numpy.array(tr_ele).reshape(-1, 1) dist, path = fastdtw(te_ele, tr_ele, dist=euclidean) if len(nn_dist_array) < self.nn_num: nn_dist_array = numpy.append(nn_dist_array, dist) nn_ts_ls.append(tr_ele_ls) nn_label_array = numpy.append(nn_label_array, label[i]) elif numpy.max(nn_dist_array) > dist: if numpy.max(nn_dist_array) < self.max_dist: break max_ind = numpy.argmax(nn_dist_array) nn_dist_array[max_ind] = dist nn_ts_ls[max_ind] = tr_ele_ls nn_label_array[max_ind] = label[i] else: continue nn_ts_array = numpy.array(nn_ts_ls) return nn_dist_array, nn_ts_array, nn_label_array
def testfdtw(x,y,color): D,dist,path = fastdtw.fastdtw(x,y) mat = np.zeros((len(x),len(y),4)) maxcost = max(filter(lambda x:x!=np.inf,map(lambda x:x[0],D.values()))) print "\t",maxcost,(len(x)+len(y))/2 #mat.fill(maxcost+1) mat.fill(np.inf) for i,j in D: mat[i-1,j-1] = D[i,j] smat = mat[:,:,0] #smat[np.isinf(smat)] = maxcost+1 #smat = maxcost+1-smat #print smat #plt.imshow(smat, # interpolation="none", # cmap=plt.cm.gray, # #norm=matplotlib.colors.LogNorm() #) plotp = np.array(path).T plt.plot(plotp[1],plotp[0],color=color)
def find_lowest(self): """ :return: mid, distance, path """ min_distance = 0 min_path = None min_mid = None candidates = self.candidates #get_molecules.get_mid_list(conn) i = 0 for mid in candidates: frequencies, intensities = get_peaks.get_frequency_intensity_list(conn, mid, max=self.max_frequency, min=0) # , try: distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean) except IndexError: continue if min_path is None: min_path = path min_distance = distance min_mid = mid elif distance < min_distance: min_distance = distance min_path = path min_mid = mid print min_distance print min_mid print get_molecules.getName(conn, min_mid) return min_mid, min_distance, min_path
# print a - b # print (a-b)**2 #euclidean mod dist euc = [] euc.append(0) euc.append(7) euc.append(4) euc.append(9) euc.append(2) euc.append(5) euc.append(11) euc.append(8) euc.append(3) euc.append(10) euc.append(6) euc.append(1) # dist_c = lambda a,b: (((a[0] - b[0]) **2) + (((a[1] - b[1])) **2) + (((a[2] - b[2])) **2)) **0.5 dist_c = lambda a,b: ((((a[1] - b[1]) * 0.125) **2) + (((a[2] - b[2]) * 0.25) **2)) **0.5 dist_mod = lambda a,b: ((euc.index(int(a[0] - b[0]) % 12) **2) + (((a[1] - b[1]) * 0.125) **2) + (((a[2] - b[2]) * 0.25) **2)) **0.5 dist_x = lambda a,b: ((a[0] - b[0]) **2) # print (ground[0][0] - match[0][0]) ** 2) startLoad = time.clock() print fastdtw.fastdtw(lmr_ground,lmr_match, dist= dist_mod)[0] print fastdtw.fastdtw(lmr_ground,lmr_mismatch, dist= dist_mod)[0] endLoad = time.clock() print "load time = " + str(endLoad-startLoad)
def distance(self, v1, v2): distance, path = fastdtw(v1, v2, dist=euclidean) return distance
import numpy as np from scipy.spatial.distance import euclidean from matplotlib import pyplot as plt from scipy.cluster.hierarchy import dendrogram, linkage from scipy.cluster.hierarchy import cophenet from scipy.spatial.distance import pdist from sklearn import preprocessing from fastdtw import fastdtw x = np.array([[1,1], [2,2], [3,3], [4,4], [5,5]]) y = np.array([[2,2], [3,3], [4,4]]) print x.shape, y.shape distance, path = fastdtw(x, y, dist=euclidean) print(distance, path) Z = linkage(path, 'centroid') plt.figure(figsize=(25, 10)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') dendrogram( Z, leaf_rotation=90., leaf_font_size=8., ) plt.show()
def shapeEncoding(arrData_raw, nCodingWndSize, nNeighbors=3): ''' souce encoding according to signal shape Parameters: ----------- arrData : data nCodingWndSize : coding window size in number of data points nNeighbors : the number of neighbor window to determine the data range, this value should be an odd number Returns: -------- lsCode : a list of integer codes arrDataShape : the numpy.array of approximating shapes ''' lsDataCode = [] arrDataShape = None arrWndShape = None nDataLen = len(arrData_raw) arrData = arrData_raw / np.max(arrData_raw) for nStartIndex in xrange(0, nDataLen, nCodingWndSize): nEndIndex = nStartIndex + nCodingWndSize if(nEndIndex > nDataLen): # forget about the last segment if it is shorter than nWndSize break arrWndData = arrData[nStartIndex: nEndIndex] arrWndData_shift = arrWndData - np.min(arrWndData) # remove base line # # find nearby windows # nNeighborStart, nNeighborEnd = None, None # if (nStartIndex-(nNeighbors-1)/2*nCodingWndSize <= 0): # nNeighborStart = 0 # nNeighborEnd = min(len(arrData), # nNeighborStart + nNeighbors*nCodingWndSize) # elif (nEndIndex + (nNeighbors-1)/2*nCodingWndSize >= nDataLen): # nNeighborEnd = len(arrData) # nNeighborStart = max(0, nNeighborEnd-nNeighbors*nCodingWndSize) # else: # nNeighborStart = max(0,\ # nStartIndex-(nNeighbors-1)/2*nCodingWndSize) # nNeighborEnd = min(len(arrData), # nNeighborStart + nNeighbors*nCodingWndSize) # # compute the max range of neighbors # dMaxNeighborRange = 0.0 # for s in xrange(nNeighborStart, nNeighborEnd, nCodingWndSize): # dRange = np.ptp(arrData[s:s+nCodingWndSize]) # if(dRange >= dMaxNeighborRange): # dMaxNeighborRange = dRange # generate patterns dcPatterns = generateShapeTemplates(nCodingWndSize, np.ptp(arrWndData_shift) ) # examine the shape of data within window nDebugIndex = None if (nDebugIndex is not None and \ nStartIndex <= nDebugIndex and nEndIndex > nDebugIndex): pdb.set_trace() # find nearest pattern nCode = None if(np.std(arrWndData_shift) <= FLAT_PATTERN_STD ): nCode = SHAPE_CODE_FLAT arrWndShape = np.zeros(nCodingWndSize) else: dCriteria = float("inf") for i, arrShape in dcPatterns.iteritems(): dDis, path = fastdtw(arrWndData_shift, arrShape, dist=lambda a, b: abs(a-b)**2.0 ) if(dDis < dCriteria): dCriteria = dDis nCode = i arrWndShape = arrShape / np.max(arrShape) # update arrDataShape and code list arrDataShape = arrWndShape if arrDataShape is None \ else np.concatenate([arrDataShape, arrWndShape]) lsDataCode.append(nCode) return lsDataCode, arrDataShape
def test_2d_fastdtw(self): distance = fastdtw(self.x_2d, self.y_2d, dist=self.dist_2d)[0] self.assertAlmostEqual(distance, ((1+1)**0.5)*2)
def test_1d_fastdtw(self): distance = fastdtw(self.x_1d, self.y_1d)[0] self.assertEqual(distance, 2)
def dist(X,Y): CX = np.std(X,axis = 1).mean() CY = np.std(Y,axis = 1).mean() return fastdtw(X,Y,dist = cost,radius = radius)[0]/(CX + CY + beta)
def DTW_train(self): try: features_names = ['In', 'F1', 'F2', 'F3'] non_native_sentence = [] native_sentence = [] for key, val in self.DTW_Y_train.items(): non_native_sentence.append(key) native_sentence.append(key) already_used = [] for j in range(len(non_native_sentence)): val = non_native_sentence[j] val = clean_filename_TextGrid(val) val = clean_filename_numbers(val) for k in range(len(native_sentence)): if native_sentence[k] == non_native_sentence[j]: continue sec_val = native_sentence[k] sec_val = clean_filename_TextGrid(sec_val) sec_val = clean_filename_numbers(sec_val) if sec_val != val: continue if native_sentence[k] in already_used: continue already_used.append(native_sentence[k]) non_native = self.DTW_X_train[non_native_sentence[j]] native = self.DTW_X_train[native_sentence[k]] # DTW operation print "Comparing: {} and {}".format(non_native_sentence[j], native_sentence[k]) # not DTW between the same person if np.array_equal(non_native, native): continue with open(self.dtw_comparison_native_directory, 'a') as the_file: the_file.write( "Non native: {} - Native: {}\n".format(non_native_sentence[j], native_sentence[k])) for feat in range(4): dist, path = fastdtw(non_native[:, feat], native[:, feat]) path_x = [point[0] for point in path] path_y = [point[1] for point in path] length_x = len(path_x) length_y = len(path_y) assert length_x == length_y # just to be sure :) distance = [] for i in range(length_x): distance.append(abs(path_x[i] - path_y[i])) # calculate a value for similarity min_distance = min(distance) max_distance = max(distance) norm = [] for i in range(len(distance)): z = float(distance[i] - min_distance) / float(max_distance - min_distance) norm.append(z) similarity = 100 - (100 * statistics.mean(norm)) the_file.write("Similarity of {0}: {1:.2f}%\n".format(features_names[feat], similarity)) self.distance_cost_plot(path) plt.plot(path_x, path_y) plt.show() x = 0 except: print "Error: ", sys.exc_info() raise
#kMns.fit(mfccAll) #print kMns.predict(mfccs["paco_no_001"]) #print kMns.predict(mfccs["paco_uno_001"]) os.system("sox -r 16000 -t alsa default recording.wav silence 1 0.1 1% 1 1.5 1%") (rate2,sig2) = wav.read("recording.wav") #sig2=pp.maxabs_scale(sig2) sig2=pp.maxabs_scale(sig2)# mfcc_feat2 = mfcc(sig2,rate2) #mfcc_feat2=scale(mfcc_feat2)#Standarizar? mind=1e40 minrd=1e40 for k in sorted(mfccs.keys()): mfcc_feat1=mfccs[k] distance, path = fastdtw(mfcc_feat1, mfcc_feat2, dist=euclidean) rd=distance/len(path) #print k,distance, len(path),rd if distance<mind: mind=distance mk=k if rd<minrd: minrd=rd mkrd=k print mk,mind print mkrd,minrd #cmd="cp recording.wav /home/francisco/voz/{}.wav".format(mk+"0") #os.system(cmd) mfcc_feat1=mfccs[mk]
def DTW_test(self): try: features_names = ['In', 'F1', 'F2', 'F3'] already_used = [] sentences = [] with open(self.sentences_directory) as sentences_file: lines = sentences_file.readlines() for s in lines: s = s.replace('\n', '') sentences.append(s) for i in range(len(self.DTW_X_test)): # retrieve the sentence from the test set non_native = self.DTW_X_test[i] non_native_phonemes = self.DTW_Y_test[i] non_native_sentence = "" for key, val in self.dictionary_testset.items(): arr = np.array(val) if np.array_equal(arr, non_native_phonemes): non_native_sentence = key break for sen in sentences: # compare the non-native sentence with the classification set if sen in non_native_sentence: # retrieve the "same" sentence from the training set for j in range(len(self.DTW_X_train)): native = self.DTW_X_train[j] native_phonemes = self.DTW_Y_train[j] native_sentence = "" for key, val in self.dictionary_trainset.items(): # if the sentence is the same if sen in key: if np.array_equal(val, native_phonemes): # check if I already used this sentence if key in already_used: continue # save it and apply DTW native_sentence = key already_used.append(key) # debug print "Comparing: {} and {}".format(non_native_sentence, native_sentence) with open(self.dtw_comparison_directory, 'a') as the_file: the_file.write("Non native: {} - Native: {}\n".format(non_native_sentence, native_sentence)) for feat in range(4): dist, path = fastdtw(non_native[:, feat], native[:, feat]) path_x = [point[0] for point in path] path_y = [point[1] for point in path] length_x = len(path_x) length_y = len(path_y) assert length_x == length_y # just to be sure :) distance = [] for i in range(length_x): distance.append(abs(path_x[i] - path_y[i])) # calculate a value for similarity min_distance = min(distance) max_distance = max(distance) norm = [] for i in range(len(distance)): z = float(distance[i] - min_distance) / float( max_distance - min_distance) norm.append(z) similarity = 100 - (100 * statistics.mean(norm)) the_file.write( "Similarity of {0}: {1:.2f}%\n".format(features_names[feat], similarity)) # self.distance_cost_plot(path) # plt.plot([int(i[0]) for i in path], [int(i[1]) for i in path]) # plt.show() except: print "Error: ", sys.exc_info() raise