def __init__(self, selepisodio, tf=False, cons=False): #Representa un episodio de sueño mediante las series temporales de flujo y temperatura class Individuo: def __init__(self, nombre, tiempo, temperatura=[], flujo=[], consumo=[]): self.nombre = nombre self.tiempo = tiempo self.stt = temperatura self.stf = flujo self.stc = consumo sel = selepisodio print "Normalizar", len(sel.epFiltro), "episodios de sueño" # Normalizar por estandarización cada episodio de sueño (temperatura y flujo) self.eps_sueno = [] if(tf): for i in sel.epFiltro: a = preprocessing.scale(i.temp, copy=True) b = preprocessing.scale(i.flujo, copy=True) self.eps_sueno.append(Individuo(i.nombre, i.tiempo, temperatura=a, flujo=b)) elif(cons): for i in sel.epFiltro: a = preprocessing.scale(i.consumo, copy=True) self.eps_sueno.append(Individuo(i.nombre, i.tiempo, consumo=a)) """ #La diagonal de distancias no da 0 con fastdtw, mismas ST dan distancias >0 !!! for i in range(s): print eps_sueno[i].stt[-1], eps_sueno[i].stt[-1] for i in range(s): d, p = fastdtw(eps_sueno[i].stt, eps_sueno[i].stt, dist=euclidean) dd, p = fastdtw(eps_sueno[i].stf, eps_sueno[i].stf, dist=euclidean) dt = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[i].stt, dist_only=True) df = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[i].stf, dist_only=True) print d, dd, dt, df """ #Calcular matriz de distancias entre cada individuo por DTW s = len(self.eps_sueno) self.distancias = np.zeros((s, s)) if(tf): for i in range(s): for j in range(s): #distanceTemp , path = fastdtw(eps_sueno[i].stt, eps_sueno[j].stt, dist=euclidean) #Distancia en temperatura #distanceFlujo , path = fastdtw(eps_sueno[i].stf, eps_sueno[j].stf, dist=euclidean) #Distancia en flujo distanceTemp = mlpy.dtw_std(self.eps_sueno[i].stt, self.eps_sueno[j].stt, dist_only=True) #Dist. euclidea distanceFlujo = mlpy.dtw_std(self.eps_sueno[i].stf, self.eps_sueno[j].stf, dist_only=True) self.distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total elif(cons): for i in range(s): for j in range(s): self.distancias[j][i] = mlpy.dtw_std(self.eps_sueno[i].stc, self.eps_sueno[j].stc, dist_only=True) #Dist. euclidea #Vector con las distancias requeridas para hacer clustering #print distancias print self.distancias.shape #Obtener la diagonal de la matriz de distancias dists = ssd.squareform(self.distancias) print dists #Calcular clustering jerárquico self.Z = linkage(dists, 'average')
def myDTW(self, text_file, arr1, arr2, arr3, arr4, arr5, arr6, arr7, arr8, arr9): df = pd.read_csv(text_file, sep=' ', header=None) x1 = df.iloc[0:75, 0].values y1 = df.iloc[0:75, 1].values z1 = df.iloc[0:75, 2].values x2 = df.iloc[0:75, 3].values y2 = df.iloc[0:75, 4].values z2 = df.iloc[0:75, 5].values x3 = df.iloc[0:75, 6].values y3 = df.iloc[0:75, 7].values z3 = df.iloc[0:75, 8].values dist1 = mlpy.dtw_std(x1, arr1, dist_only=True) #print(text_file + " distX: ", dist1) dist2 = mlpy.dtw_std(y1, arr2, dist_only=True) #print(text_file + " distY: ", dist2) dist3 = mlpy.dtw_std(z1, arr3, dist_only=True) #print(text_file + " distZ: ", dist3) dist4 = mlpy.dtw_std(x2, arr4, dist_only=True) #print(text_file + " distX: ", dist1) dist5 = mlpy.dtw_std(y2, arr5, dist_only=True) #print(text_file + " distY: ", dist2) dist6 = mlpy.dtw_std(z2, arr6, dist_only=True) #print(text_file + " distZ: ", dist3) dist7 = mlpy.dtw_std(x3, arr7, dist_only=True) #print(text_file + " distX: ", dist1) dist8 = mlpy.dtw_std(y3, arr8, dist_only=True) #print(text_file + " distY: ", dist2) dist9 = mlpy.dtw_std(z3, arr9, dist_only=True) #print(text_file + " distZ: ", dist3) return dist1 + dist2 + dist3 + dist4 + dist5 + dist6 + dist7 + dist8 + dist9
def __init__(self, episodios, tf=False, cons=False): #Representa un episodio de sueño mediante las series temporales de flujo y temperatura class Individuo: def __init__(self, nombre, tiempo, temperatura=[], flujo=[], consumo=[]): self.nombre = nombre self.tiempo = tiempo self.stt = temperatura self.stf = flujo self.stc = consumo if(DEBUG): print "Normalizar", len(episodios), "episodios de sueño" # Normalizar por estandarización cada episodio de sueño (temperatura y flujo) self.eps_sueno = [] if(tf): for i in episodios: a = preprocessing.scale(i.temp, copy=True) b = preprocessing.scale(i.flujo, copy=True) self.eps_sueno.append(Individuo(i.nombre, i.tiempo, temperatura=a, flujo=b)) elif(cons): for i in episodios: a = preprocessing.scale(i.consumo, copy=True) self.eps_sueno.append(Individuo(i.nombre, i.tiempo, consumo=a)) #Calcular matriz de distancias entre cada individuo por DTW s = len(self.eps_sueno) self.distancias = np.zeros((s, s)) if(tf): for i in range(s): for j in range(s): distanceTemp = mlpy.dtw_std(self.eps_sueno[i].stt, self.eps_sueno[j].stt, dist_only=True) distanceFlujo = mlpy.dtw_std(self.eps_sueno[i].stf, self.eps_sueno[j].stf, dist_only=True) self.distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total elif(cons): for i in range(s): for j in range(s): self.distancias[j][i] = mlpy.dtw_std(self.eps_sueno[i].stc, self.eps_sueno[j].stc, dist_only=True) #Dist. euclidea #Vector con las distancias requeridas para hacer clustering if(DEBUG): print "Matriz de distancias", self.distancias.shape, self.distancias #Obtener la diagonal de la matriz de distancias dists = ssd.squareform(self.distancias) #Calcular clustering jerárquico self.Z = linkage(dists, 'average') #Etiquetas de cada episodio para mostrar en el dendrograma self.labels=[] for i in self.eps_sueno: self.labels.append(i.nombre)
def _dist_matrix(self, x, y): """Computes the M x N distance matrix between the training dataset and testing dataset (y) using the DTW distance measure Arguments --------- x : array of shape [n_samples, n_timepoints] y : array of shape [n_samples, n_timepoints] Returns ------- Distance matrix between each item of x and y with shape [training_n_samples, testing_n_samples] """ # Compute the distance matrix dm_count = 0 x_s = np.shape(x) y_s = np.shape(y) dm = np.zeros((x_s[0], y_s[0])) dm_size = x_s[0] * y_s[0] total = dm_size p = ProgressBar() for i in xrange(0, x_s[0]): for j in xrange(0, y_s[0]): dm[i, j] = mlpy.dtw_std(x[i, :], y[j, :], dist_only=True) # Update progress bar dm_count += 1 if self.progress_bar: p(float(dm_count) / total * 100) return dm
def classifyImg(): fileList = [ x for x in listdir(r'F:\PY\data\Img') if x.lower().endswith(".jpg") ] m = len(fileList) for fn in range(m): img = Image.open(r'F:\PY\data\Img\{0}'.format(fileList[fn])) arr = array(img) list = [] if arr.ndim == 2: print fileList[fn] continue for n in arr: list.append(n[0][0]) for n in arr: list.append(n[0][1]) for n in arr: list.append(n[0][2]) data[fileList[fn]] = list reference = data['007_0025.jpg'] result = {} for x, y in data.items(): dist = mlpy.dtw_std(reference, y, dist_only=True) result[x] = dist sortedRes = OrderedDict(sorted(result.items(), key=lambda x: x[1])) for a, b in sortedRes.items(): print("{0} - {1}".format(a, b)) i = i + 1 if i == 10: break
def dtw_checker(inputFile1, inputFile2): ''' COMPARES TO GIVEN FILES ''' # DEFINING CONSTANTS DIST_STEP = 2000 (inAudio1, fs1) = file_reader(inputFile1) (inAudio2, fs2) = file_reader(inputFile2) # APPLY DTW ALGORITHM (http://mlpy.sourceforge.net/docs/3.5/dtw.html) dist_array = [] i = 1 while (i < len(inAudio1)): (dist, cost, path) = mlpy.dtw_std(inAudio1[i:DIST_STEP+i], inAudio2[i:DIST_STEP+i], dist_only=False) dist_array.append(dist) i+=DIST_STEP dist_array = numpy.asarray(dist_array) dist_final = numpy.mean(dist_array) result = os.path.basename(inputFile1) + " -- " + os.path.basename(inputFile2) + " ==> " + str(dist_final) # print result return (result, dist_final)
def keywordSpotter(topN): f = open(KEYWORDS, 'r') keywords = f.readlines() f.close() featureDict = calculateTestFeatures() for k in keywords: trainsample = getTrainSample(k) # dictionary e.g.: {'302-30-07': 'h-u-n-d-r-e-d\n', '301-16-08': 'h-u-n-d-r-e-d\n'} testSamples = getTestSamples(k) img = Image.open(IMG_PATH + trainsample + IMG_ENDING) x = np.asarray(img) x = extractFeatures(x) distDict = {} totalWords = 0 for filename in os.listdir(IMG_PATH): #if (int(filename[0:3]) >= 300): if (int(filename[0:3]) >= 200): #take all totalWords += 1 y = featureDict[filename] dist, cost, path = mlpy.dtw_std(x, y, dist_only=False) if len(distDict) < topN: distDict[filename[0:9]] = dist else: if distDict[max(distDict, key=distDict.get)] > dist: del distDict[max(distDict, key=distDict.get)] distDict[filename[0:9]] = dist # print distDict evaluation(distDict, testSamples, totalWords, k)
def mlpy_example(): x = np.array([0, 0, 0, 0, 1, 1, 2, 2, 3, 2, 1, 1, 0, 0, 0, 0], dtype=np.double) y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0], dtype=np.double) test_1, test_2, cost_m = md.dtw_path_single(x, y, 200, 200, 100, 5.0, 5.0, 1) dist, cost, path = mlpy.dtw_std(x, y, dist_only=False) true_1 = path[0] true_2 = path[1] #test the program returns the answers in the correct order ind_1 = np.sum(np.abs(test_1 - true_1)) ind_2 = np.sum(np.abs(test_2 - true_2)) #print(cost_m-cost) #fig, ax =plt.subplots(ncols=2,sharex=True,sharey=True) #ax[0].imshow(cost_m.T,extent=[0,x.size,0,y.size],origin='lower') #ax[1].imshow(cost.T ,extent=[0,x.size,0,y.size],origin='lower') ##ax.imshow(cost,extent=[0,x2.size,0,x2[::2].size],origin='lower') #ax[0].plot(test_1,test_2,'--',color='black') #ax[1].plot(true_1,true_2,'.-',color='red') #plt.show() #use the sum of 0 integers to confirm it is true assert ind_1 == 0 assert ind_2 == 0 assert np.allclose(cost_m, cost)
def dtw(x, y): x = np.genfromtxt( x, delimiter=',' ) #DTW correlate between two signals to calculate the distance y = np.genfromtxt(y, delimiter=',') x = downsampling(x) y = downsampling(y) x = x[:, 1] y = y[:, 1] x = (x - np.mean(x)) / np.std(x) y = (y - np.mean(y)) / np.std(y) plt.plot(x) plt.plot(y) plt.show() dist, cost, path = mlpy.dtw_std(x, y, dist_only=False) print np.array(dist) fig = plt.figure(1) ax = fig.add_subplot(111) plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') plot2 = plt.plot(path[0], path[1], 'w') xlim = ax.set_xlim((-0.5, cost.shape[0] - 0.5)) ylim = ax.set_ylim((-0.5, cost.shape[1] - 0.5)) plt.show() return dist
def modified_extract_features(data_male, data_female, windows, labels, timestamp): ''' STEP e LEN in seconds :param pha_processed: the phasic np matrix with 4 columns :param WINSTEP: window step :param WINLEN: window length ''' dtw_values = [] for i in range(len(windows)): t_start, t_end = windows[i] win_s1= data_male[np.where((timestamp >= t_start) & (timestamp < t_end))[0]] win_s2= data_female[np.where((timestamp >= t_start) & (timestamp < t_end))[0]] #normalizzazione delle porzioni #tn_male = np.array(data_male.tonic) #tn_female = np.array(data_female.tonic) #tn_male = mynorm_maxmin(tn_male) #tn_female = mynorm_maxmin(tn_female) win_s1= (win_s1- np.mean(win_s1)) / np.std(win_s1) win_s2= (win_s2- np.mean(win_s2)) / np.std(win_s2) dtw_curr= mlpy.dtw_std(win_s1, win_s2) dtw_curr = dtw_curr/ len(win_s1) lab = labels[i] dtw_values.append([dtw_curr, lab]) #attaccare in qualche modo le labels return dtw_values #(gia' con le labels)
def classifyImg(): fileList = [x for x in listdir(r'F:\PY\data\Img') if x.lower().endswith(".jpg")] m = len(fileList) for fn in range(m): img = Image.open(r'F:\PY\data\Img\{0}'.format(fileList[fn])) arr = array(img) list = [] if arr.ndim==2: print fileList[fn] continue; for n in arr: list.append(n[0][0]) for n in arr: list.append(n[0][1]) for n in arr: list.append(n[0][2]) data[fileList[fn]] = list reference = data['007_0025.jpg'] result = {} for x,y in data.items(): dist = mlpy.dtw_std(reference,y,dist_only=True) result[x]=dist sortedRes = OrderedDict(sorted(result.items(),key=lambda x:x[1])) for a,b in sortedRes.items(): print("{0} - {1}".format(a,b)) i=i+1 if i==10: break
def dtw_interseries(s1, s2, squared=False): """ :param s1: Time series 1 as list :param s2: Time series 2 as list :param squared: boolean. if true, distance is l2-norm. If false, l1-norm :return: unnormalized minimum-distance warp path between sequences """ return mlpy.dtw_std(s1, s2, dist_only=True, squared=squared)
def dtwDistance(list1, list2): y1 = [li['y'] for li in list1] y2 = [li['y'] for li in list2] dist= mlpy.dtw_std(y1, y2) return dist
def dtw(filepath1,filepath2): v1 = get_json_from_file(filepath1) v2 = get_json_from_file(filepath2) dist, cost, path = mlpy.dtw_std(v1.flatten(), v2.flatten(), dist_only=False) print dist, filepath2
def test_dtw_short(): """ This test calculates DTW using DTW 1.0 by Rouanet (modified to remove normalization), dtw_std from mlpy and compares it with DTW in ts_analytics. It uses two ten element lists as using by Rouanet in http://nbviewer.ipython.org/github/pierre-rouanet/dtw/blob/master/simple%20example.ipynb. """ x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0] y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0] mlpy_dist = mlpy.dtw_std(x, y) tsa_dist = tsa.dtw(x, y) assert tsa_dist == mlpy_dist
def dtwDistanceMLPY(times1, times2): mx = float(max(times1)) mn = float(min(times1)) if mx-mn > 0: times1 = [(item-mn)/(mx-mn) for item in times1] mx = float(max(times2)) mn = float(min(times2)) if mx-mn > 0: times2 = [(item-mn)/(mx-mn) for item in times2] dist = mlpy.dtw_std(times1, times2, dist_only=True) return dist
def recurs_in_single_linkage(dist, X, clst, sum_of_dist): minim = dist[0][0] # print(minim) for i in dist: if min(i) < minim: minim = min(i) # flag_one_from_couple = 0 for index_col, row in enumerate(dist): if minim in row: buf = [index_col, row.index(minim)] # print(buf, dist[buf[0]][buf[1]]) # print(X) for obj in X[buf[0] + buf[1] + 1]: X[buf[0]].append(obj) X.remove(X[buf[0] + buf[1] + 1]) # print(X) new_dist = [] for i in X[:-1]: buf_dist = [] min_buf_dist = [] for j in i: min_dist_col = [] for k in X[X.index(i) + 1:]: min_dist_row = [] for m in k: min_dist_row.append(mlpy.dtw_std(j, m)) min_dist_col.append(min(min_dist_row)) buf_dist.append(min_dist_col) # print(buf_dist) # print(len(buf_dist)) if len(buf_dist) > 1: # проверка на матрицу for obj in range(len(buf_dist[0])): buf_min_for_buf_dist = [] for min_obj in range(len(buf_dist)): buf_min_for_buf_dist.append(buf_dist[min_obj][obj]) min_buf_dist.append(min(buf_min_for_buf_dist)) new_dist.append(min_buf_dist) else: new_dist.append(buf_dist[0]) # print(new_dist) # print() # print() if len(X) > clst: # print("New recursion") sum_of_dist = recurs_in_single_linkage(new_dist, X, clst, sum_of_dist) else: center = [np.ndarray.tolist(np.mean(i, axis=0)) for i in X] # sum_of_dist = 0 for i in range(len(X)): for j in range(len(X[i])): sum_of_dist += euclid_dist(center[i], X[i][j]) return sum_of_dist return sum_of_dist
def getDTWDist(data1, data2): ''' R=rpy2.robjects.r DTW=importr('dtw') d1r,d1c=datat1.shape d2r,d2c=data2.shape data1R=R.matrix(data1,nrow=d1r,ncol=d1c) data2R=R.matrix(data2,nrow=d2r,ncol=d2c) alignment = R.dtw(data1R,data2R,keep=True, step_pattern=R.rabinerJuangStepPattern(4,"c"),open_begin=True,open_end=True,distance_only=True) return alignment.rx('distance')[0][0] ''' # distance,path= fastdtw(data1,data2,dist=euclidean) return mlpy.dtw_std(data1, data2, dist_only=True)
def FindNextStartAndEndPointOnPattern(m, eCounter, sCounter): sumExpressionAbsolute = -1 l = 4 while (sumExpressionAbsolute < e1): shortList = GetShortList(seriesRepresented, sList[sCounter], l) x = GetOnlyOneAxis(shortList, 0) y = GetOnlyOneAxis(shortList, 1) if len(x) <= 3 or len(y) <= 3: m = n return m, eCounter, sCounter regressionConstants = numpy.polyfit(x, y, 2) a = regressionConstants[0] b = regressionConstants[1] c = regressionConstants[2] sumExpression = 0 for i in range(0, l): if i < len(shortList): functionSolved = SolveRegresionFunction( a, b, c, shortList[i][0]) expressionFormula = pow((functionSolved - shortList[i][0]), 2) sumExpression += expressionFormula sumExpressionAbsolute = GetAbsoluteValue(sumExpression) if (sumExpressionAbsolute < e1): l += 1 else: eList.append(sList[sCounter] + l) eCounter += 1 i = 1 flag2 = "true" while flag2 == "true": firstList = GetShortList(seriesRepresented, sList[sCounter], eList[eCounter]) secondList = GetShortList(seriesRepresented, (sList[sCounter]) + i, (eList[eCounter]) + i) firstAxis = GetOnlyOneAxis(firstList, 0) secondAxis = GetOnlyOneAxis(secondList, 0) dist, cost, path = mlpy.dtw_std(firstAxis, secondAxis, dist_only=False) if (dist <= e2): i += 1 if (i >= (eList[eCounter] - sList[sCounter])) or (dist > e2): flag2 = "false" sList.append(eList[eCounter] + i) sCounter += 1 m = sList[sCounter] return m, eCounter, sCounter
def k_means_clust(data, num_clust, num_iter): # centroids = random.sample(list(data), num_clust) # b = np.random.randint(0, data.shape[0], num_clust) b = np.random.permutation(data.shape[0])[:num_clust] print('Initialisation ids: %s' % str(b)) centroids = data[b] conv = [] meta = [] for n in range(num_iter): print(n) assignments = {} # assign data points to clusters # for ind, i in enumerate(data): min_dist = float('inf') closest_clust = None for c_ind, j in enumerate(centroids): cur_dist = mlpy.dtw_std(i, j, dist_only=True) if cur_dist < min_dist: min_dist = cur_dist closest_clust = c_ind if closest_clust in assignments: assignments[closest_clust].append(ind) else: assignments[closest_clust] = [] # recalculate centroids of clusters # # key = cluster number # for key, init in zip(assignments, data[b]): clust_sum = 0 for k in assignments[key]: # k = number of records (rows of matrix) # clust_sum = clust_sum + data[k] centroids[key] = [m / len(assignments[key]) for m in clust_sum] conv.append( float(mlpy.dtw_std(init, centroids[key], dist_only=True))) meta.append(conv) conv = [] return [centroids, assignments, meta]
def pairwise_dtw(samples, axis): """ DTWによるサンプルのペアワイズ距離を取る samples : pandas.DataFrameのサンプル axis : columnの軸を指定 """ import mlpy from scipy.spatial.distance import squareform array1 = map(lambda data: data[axis], samples) d_array = [] l_array = len(array1) for i in range(l_array-1): for j in range(i+1, l_array): d_array.append(mlpy.dtw_std(array1[i], array1[j], dist_only=True)) X = squareform(d_array) return X
def compareData(in_file,lib_file): vector_index = ['arr_0','arr_1','arr_2'] dist = 0 total_dist = 0 dists = [] in_data = np.load(in_file) lib_data = np.load(lib_file) for j in range(0,3): a = in_data[vector_index[j]] b = lib_data[vector_index[j]] dist = mlpy.dtw_std(a,b,dist_only=True) total_dist += dist dists.append(dist) return (total_dist,dists)
def adaptive_align_dtw(trace, peaks, avg_height, ladders): data = trace ## TRY 1 -> use DTW from mlpy import dtw_std from matplotlib import pylab as plt from dtw import dtw peak_corr = {} for p in peaks: peak_corr[p.rtime] = [] dtw_list = [] for i in range(0, 3): for j in range(1, 3): standard_peaks, peak_index = generate_peaks( ladders, avg_height, peaks[i].rtime, peaks[-j].rtime) dist, cost, path = dtw_std(standard_peaks, data, dist_only=False, squared=True) plot_path(standard_peaks, data, path, [p[0] for p in peak_index]) #dist, cost, path = dtw( standard_peaks, data ) # fill peak correlation based on path for map_x, map_y in zip(path[0], path[1]): if map_y in peak_corr: if standard_peaks[map_x] < avg_height / 2: peak_corr[map_y].append(-1) else: peak_corr[map_y].append( search_peak_index(map_x, peak_index)) peak_assignment = score_peak_correlation(peak_corr) dpscore, rss, z, aligned_peaks = adaptive_peak_alignment( peak_assignment, peaks, ladders) return (dpscore, rss, z, aligned_peaks)
def single_linkage(x, clst): # dist = [[mlpy.dtw_std(cur_obj, other_obj) for other_obj in range(cur_obj + 1, len(X))] for cur_obj in range(len(X))] dist = [] X = [[list(i)] for i in x] # print() # print(len(X)) # print(len(X[0])) # print(len(X[0][0])) # print("HELLO") for i in X[:-1]: buf_dist = [] for j in i: for k in X[X.index(i) + 1:]: for m in k: buf_dist.append(mlpy.dtw_std(j, m)) dist.append(buf_dist) print("Dist: ", dist) res = recurs_in_single_linkage(dist, X, clst, sum_of_dist=0) print(res) return res
def dtw_k_means(data,num_clust,metric): t0 = time() centroids=random.sample(data,num_clust) counter=0 for n in range(50): counter+=1 #print counter assignments={} #assign data points to clusters for ind,i in enumerate(data): min_dist=float('inf') closest_clust=None for c_ind,j in enumerate(centroids): if LB_Keogh(i,j,5)<min_dist: cur_dist=mlpy.dtw_std(i,j,dist_only=True) if cur_dist<min_dist: min_dist=cur_dist closest_clust=c_ind if closest_clust in assignments: assignments[closest_clust].append(ind) else: assignments[closest_clust]=[] #recalculate centroids of clusters for key in assignments: clust_sum=0 for k in assignments[key]: clust_sum=clust_sum+data[k] centroids[key]=[m/len(assignments[key]) for m in clust_sum] labels = [0] * len(data) for key in assignments: for value in assignments[key]: labels[value] = key t1 = time() labels = np.array(labels) return ('Kmeans DTW', len(assignments.keys()), accuracy.getAccuracy(data,labels,len(data),'euclidean'),t1-t0)
def dtw(x,y): x=np.genfromtxt(x, delimiter=',') #DTW correlate between two signals to calculate the distance y=np.genfromtxt(y, delimiter=',') x = downsampling(x) y = downsampling(y) x= x[:,1] y= y[:,1] x= (x- np.mean(x)) / np.std(x) y= (y- np.mean(y)) / np.std(y) plt.plot(x) plt.plot(y) plt.show() dist, cost, path = mlpy.dtw_std(x, y, dist_only=False) print np.array(dist) fig = plt.figure(1) ax = fig.add_subplot(111) plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') plot2 = plt.plot(path[0], path[1], 'w') xlim = ax.set_xlim((-0.5, cost.shape[0]-0.5)) ylim = ax.set_ylim((-0.5, cost.shape[1]-0.5)) plt.show() return dist
def adaptive_align_dtw( trace, peaks, avg_height, ladders ): data = trace ## TRY 1 -> use DTW from mlpy import dtw_std from matplotlib import pylab as plt from dtw import dtw peak_corr = {} for p in peaks: peak_corr[p.rtime] = [] dtw_list = [] for i in range(0, 3): for j in range(1, 3): standard_peaks, peak_index = generate_peaks( ladders, avg_height, peaks[i].rtime, peaks[-j].rtime ) dist, cost, path = dtw_std( standard_peaks, data, dist_only=False, squared=True) plot_path( standard_peaks, data, path, [ p[0] for p in peak_index ] ) #dist, cost, path = dtw( standard_peaks, data ) # fill peak correlation based on path for map_x, map_y in zip( path[0], path[1] ): if map_y in peak_corr: if standard_peaks[map_x] < avg_height/2: peak_corr[map_y].append( -1 ) else: peak_corr[map_y].append( search_peak_index( map_x, peak_index ) ) peak_assignment = score_peak_correlation( peak_corr ) dpscore, rss, z, aligned_peaks = adaptive_peak_alignment( peak_assignment, peaks, ladders ) return (dpscore, rss, z, aligned_peaks)
def _postprocessing(self, result, satellite, querylen, time): """ Conduct post-processing to the retrieved result. Return <song_id, hits for majority time offset, value of time offset, overall hits #> satellite: the naive hits from the hashing table. type(satellite) is ndarray. querylen: the length of time of query, the unit is 10ms. """ # Permit +/- 10% tempo difference # tolerance = 0.1 output = np.zeros((len(result), 4), dtype=int) # delta = querylen * tolerance # Find the most popular time offset for i in range(len(result)): # Fetch satellites contain specific track id tkR = satellite[satellite[:,0]==result[i,0]] # Drawing the histogram of the value of offset time dts, xx = self._unique_first(tkR[:,2]) xx.append(len(tkR)) dtcounts = np.diff(xx) xx = dtcounts.argmax(0) vv = dtcounts.max(0) hitted = np.array([tkR[k] for k in range(len(tkR)) if tkR[k,2]==dts[xx]]) stamp, ind = self._unique_first(hitted[:,1]) ind.append(len(hitted)) dtcounts = np.diff(ind) hitlen = np.zeros(len(time), dtype=int) for j in stamp: hitlen[time.index(j-dts[xx])] = dtcounts[stamp.index(j)] from mlpy import dtw_std dis = dtw_std(querylen, hitlen) output[i]=[result[i,0],len(stamp),len(hitted),dis] # Sort the R in accordance with time coverage output.view('i8,i8,i8,i8').sort(order=['f1'], axis=0) return output[::-1]
import numpy as np import time from cdtw.src.pydtw import* r = np.arange(6000) q = np.arange(6000) s = Settings() s.step.set_type('dp2') # s.global_constraint.set_type('itakura') # s.global_constraint.set_param(0.2) # s.compute_path = False t1 = time.time() d = dtw(r, q, s) t2 = time.time() print s print "python total time: " + str(t2 - t1) import mlpy t1 = time.time() d = mlpy.dtw_std(r, q, dist_only = False) t2 = time.time() print "mlpy total time: " + str(t2 - t1)
import mlpy import matplotlib.pyplot as plt import matplotlib.cm as cm x = [0, 0, 0, 0, 1, 1, 2, 2, 3, 2, 1, 1, 0, 0, 0, 0] y = [0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0] dist, cost, path = mlpy.dtw_std(x, y, dist_only=False) fig0 = plt.figure(1) plt.plot(x, "b") plt.plot(y, "r") fig = plt.figure(2) ax = fig.add_subplot(111) plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') plot2 = plt.plot(path[0], path[1], 'w') xlim = ax.set_xlim((-0.5, cost.shape[0] - 0.5)) ylim = ax.set_ylim((-0.5, cost.shape[1] - 0.5)) plt.show()
def dtw_distance(x, y): return mlpy.dtw_std(x, y, dist_only=True)
def kmeans(X, clusters, num_iter=300, labels=0, metr="euclid"): rnd_centr = random.sample(range(len(X)), clusters) print("random center:", rnd_centr) # print("random number centroid:", rnd_centr) # num_centr = [X[0], X[2]] num_centr = [X[i] for i in rnd_centr] print("len num of centr in start:", len(num_centr)) global sum_of_dist for n in range(num_iter): dist = [] for obj in X: # для каждого объекта исходной матрицы ищем расстояние до центройдов buf = [mlpy.dtw_std(obj, centr) for centr in num_centr] # расстояние до каждого центройда # print("len of buf:", len(buf)) dist.append(buf) # формируем матрицу расстояний print("len DIST:", len(dist)) num_clst = [ [d.index(min(d)), min(d)] for d in dist ] # минимальное расстояние до какого либо центройда для каждого объекта print() print("len num_clst", len(num_clst)) dict_clst = { } # словарь для вормирования принодлежности каждого вектора к конкретному классу sum_of_dist = 0 # переменная для накопления суммы расстояний до центройдов arr_clst = [] for i in range(len(num_clst)): if num_clst[i][ 0] not in dict_clst: # проверка на существоание номера кластера в словаре dict_clst[num_clst[i][0]] = [] arr_clst.append(num_clst[i][0]) dict_clst[num_clst[i][0]].append( X[i]) # добавление конкретного вектора к конкретному кластеру sum_of_dist += num_clst[i][1] arr_clst.sort() print("ln dictionary:", len(dict_clst)) # print("Sum of dist:", sum_of_dist, " for centroids:", num_centr) last_centr = copy.deepcopy(num_centr) # запоминание предыдущих центров num_centr = [] for key, value in dict_clst.items(): print("dict items:", key, value) num_centr.append( np.ndarray.tolist(np.array(value).mean(axis=0)) ) # находим среднее по координатам для пересчета центроидов # print("New centroid:", num_centr) # print("last_centr:", last_centr, len(last_centr), len(last_centr[0])) # print("new center:", num_centr, len(num_centr), len(num_centr[0])) if len(num_centr) != len(last_centr): continue try: if (np.array(last_centr) == np.array(num_centr)).all( ): # выходим из алгоритма если новые центры совпадают со старыми if labels: return dict_clst else: return sum_of_dist except AttributeError: print(len(last_centr)) print(len(num_centr)) # print(num_centr, sum_of_dist) # print() return sum_of_dist
def process3(self): #s1 = web.get_data_yahoo('AAL', '2014-12-15','2015-05-8')['Adj Close'] s1 = web.get_data_yahoo('INTC', '2014-10-15', '2015-02-03')['Adj Close'] s2 = web.get_data_yahoo('ibm', '2015-02-23', '2015-04-06')['Adj Close'] n1 = np.array(s1.tolist()) n2 = np.array(s2.tolist()) print len(n1), len(n2) if (len(n2) < len(n1)): print "interpolate" steps = (len(n2) * 1.0 - 1.0) / (len(n1) - len(n2)) x1 = np.arange(1, len(n2) + 1) f = interp1d(x1, n2) print n2 x_fake = np.arange(1.1, len(n2), steps) print len(x_fake) print x_fake c = np.sort(np.concatenate((x1, x_fake))) print c y1 = np.array([f(i) for i in c]) print y1 #s1=s1.reindex(index=np.arange(len(s1))) #print s1 ''' if (len(s2)<len(s1)): x2= pd.date_range(s1.index[0],s1.index[-1],freq='D') s2=s2.reindex(x2) print s2 ''' a = pd.Series(n1) b = pd.Series(y1) rets1 = a.pct_change() rets2 = b.pct_change() rets1[0] = 0 rets2[0] = 0 ''' print rets1 print rets2 ''' ''' print type(rets1) corr = pd.rolling_corr(rets1, rets2, 10) print type(corr),corr cor,pval = pearsonr(rets1,rets2) print "pearsonr",str(cor),pval ''' cor, pval = pearsonr(rets1, rets2) print "pearsonr", str(cor), pval print "def2", pearson_def(rets1, rets2) ## ''' rets3 = rets1.shift(5) rets3.fillna(0,inplace=True)#method='ffill') print rets3 ''' dist, cost, path = mlpy.dtw_std(rets1, rets2, dist_only=False) print "dist", dist pass
#dont compare if the dont have a 75 differece if len(AllHandFeats[i])>len(S) and len(S)/float(len(AllHandFeats[i]))<0.65: # print 'skip' dist=float("inf") elif len(S)>len(AllHandFeats[i]) and len(AllHandFeats[i])/float(len(S))<0.65: # print 'skip' dist=float("inf") else: #my DTW #EV=EventHorizon() #dis=EV.SimpleDynTimeWarp(S, AllHandFeats[i], band_width) if 'SC' in ZoneType: dist,cost,path=mlpy.dtw_std(S,AllHandFeats[i], dist_only=False, metric='euclidean', constraint='slanted_band', k=band_width) else: dist,cost,path=mlpy.dtw_std(S,AllHandFeats[i], dist_only=False, metric='euclidean', constraint='itakura', k=band_width) dist/=float(len(S)+len(AllHandFeats[i])) print 'DTW :',os.path.basename(Fake[0]),transcript,'=',dist if math.isinf(dist)==False: RealComps+=1 #add result to mysql try: qr=MeinSql.cursor() erotima="Insert INTO Local (SynthFile,HandFile,HandWord,Score,Zoni,Platos) VALUES('"+ os.path.basename(Fake[0])+"','"+os.path.basename(HandFileNames[i][0]) +"','"+ transcript+"','" + str(dist)+"','"+ ZoneType+"','"+ str(band_width) +"')"
thumpVotes = 0 nailVotes = 0 velcroVotes = 0 totalVotes = 0 fftrmsarray = [] peaktopeak = np.max(audioWindow) - np.min(audioWindow) rms = np.sqrt(np.mean(np.square(audioWindow))) if rms > 700: prevTime = t normalized = audioWindow / (peaktopeak / 2.0) totalVotes += 1 for template in templates["t"]: thumpDist = mlpy.dtw_std(normalized, template, dist_only=True) # if thumpDist < 90: thumpVotes += 1 thumpVotes += 90 / thumpDist print "ThumpDist = " + str(thumpDist) for template in templates["n"]: nailDist = mlpy.dtw_std(normalized, template, dist_only=True) # if nailDist < 200: nailVotes += 1 nailVotes += 200 / nailDist print "NailDist = " + str(nailDist) for template in templates["v"]: velcroDist = mlpy.dtw_std(normalized, template, dist_only=True) # if velcroDist < 120: velcroVotes += 1 velcroVotes += 120 / velcroDist print "VelcroDist = " + str(velcroDist)
f.close() indx = 0 f = open( 'D:/FYP-Developments/Dataset-Debs-2013/MovingAverageData/resultDTW5.csv', 'rU' ) #open train data for line in f: cells = line.split(",") e.append((float)(cells[7])) indx = indx + 1 if indx == dtw_data_limit: break f.close() dist, cost, path = mlpy.dtw_std(a, d, dist_only=False) print("Distance between 7th and 28th minutes - Two Golas") print(dist) print("############") dist, cost, path = mlpy.dtw_std(b, c, dist_only=False) print("Distance between 13th and 22nd minutes - Two Goals") print(dist) print("############") dist, cost, path = mlpy.dtw_std(a, e, dist_only=False) print("Distance between 7th and 5th minutes") print(dist)
path.append([0,0]) for [fadi2, fadi1] in path: cost = cost +distances[fadi1, fadi2] return path, cost path, cost = path_cost(fadi1, fadi2, accumulated_cost, distances) print(path) print(cost) #this is an implementation that we have created for this problem but we can also try it using a library that pythn has ad see the difference #attempt using the mlpy library import mlpy dist, cost, path = mlpy.dtw_std(fadi1, fadi2, dist_only = False) import matplotlib.cm as cm fig = plt.figure(1) ax = fig.add_subplor(11) plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') plot2= plt.plot(path[0], path[1], 'w') xlim = ax.set_xlim((-.05, cost.shape[0]-.05)) ylim = ax.set_ylim((-0.5, cost.shape[1]-0.5)) dist plt.plot(fadi1, 'bo-' ,label='Fadi 1') plt.plot(fadi2, 'g^-', label = 'Fadi 4') plt.legend() paths = path_cost(fadi1, fadi2, accumulated_cost, distances)[0]
def cluster(data, sum_t, num): ############## Clustering Initial Timeseries ################ plots = [] D = len(data) # Calculate distance matrix using DTW dist_mat = np.empty(shape=(D, D)) ii = 0 labels1 = [] for i in data.keys(): jj = 0 for j in data.keys(): if ii == jj: dist_mat[ii][jj] = 0 else: dist_mat[ii][jj] = dtw_std([z for z in data[i]], [z for z in data[j]], dist_only=True) jj += 1 ii += 1 labels1.append(i) # Use average linkage + DTW to remove outliers avg = linkage(squareform(dist_mat), method='average', metric='euclidean') clusters = fcluster(avg, floor(log(num) / log(2)), 'maxclust') #print(clusters) freq = {} for i in clusters: if i not in freq: freq[i] = 1 else: freq[i] += 1 # Stocks which appear in almost empty clusters are considered outliers thresh = max(freq.values()) mfe = [k for k, v in freq.items() if v > floor(num / 10)] f = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(211) plt.title("With Outliers") if num > 50: dendrogram(avg, leaf_font_size=10) else: dendrogram(avg, labels=labels1, leaf_font_size=11) locs, ll = plt.xticks() plt.setp(ll, rotation=90) # Remove outliers and their labels outliers = [] for i in range((len(clusters) - 1), -1, -1): if clusters[i] not in mfe: print("Outlier stock: " + str(labels1[i])) outliers.append(labels1[i]) dist_mat = np.delete(dist_mat, i, 0) dist_mat = np.delete(dist_mat, i, 1) del labels1[i] print("Total Outliers: " + str(len(outliers))) # Finally do the clustering !!! ward = linkage(dist_mat, method='ward', metric='euclidean') clusters1 = fcluster(ward, 3, 'maxclust') #print(clusters1) #print(labels1) plt.subplot(212) plt.title("After removing outliers") if num > 50: dendrogram(ward, no_labels=True, leaf_font_size=10) else: dendrogram(ward, labels=labels1, leaf_font_size=11) plt.subplots_adjust(hspace=.5) locs, ll = plt.xticks() plt.setp(ll, rotation=90) plots.append(f) ########### Plot Initials & Summaries ############ f2 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(211) colormap = plt.cm.gist_ncar f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)]) for k in data.keys(): data[k].plot() plt.subplot(212) f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)]) for k in sum_t.keys(): (sum_t[k] / 10).plot() plt.xlim(0, len(sum_t[max(sum_t, key=len)]) - 1) plt.subplots_adjust(hspace=.32) plots.append(f2) ############# Clustering Summarized Timeseries ############ S = len(sum_t) dist_mat = np.empty(shape=(S, S)) ii = 0 labels2 = [] # Calculate distance matrix using DTW for i in sum_t.keys(): jj = 0 for j in sum_t.keys(): if ii == jj: dist_mat[ii][jj] = 0 else: dist_mat[ii][jj] = dtw_std([z for z in sum_t[i]], [z for z in sum_t[j]], dist_only=True) jj += 1 ii += 1 labels2.append(i) # Use average linkage + DTW to remove outliers avg = linkage(squareform(dist_mat), method='average', metric='euclidean') clusters = fcluster(avg, floor(log(num, 2)), 'maxclust') #print(clusters) freq = {} for i in clusters: if i not in freq: freq[i] = 1 else: freq[i] += 1 # Stocks which appear in almost empty clusters are considered outliers thresh = max(freq.values()) mfe = [k for k, v in freq.items() if v > floor(num / 10)] f3 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(211) plt.title("With Outliers") if num > 50: dendrogram(avg, no_labels=True, leaf_font_size=10) else: dendrogram(avg, labels=labels2, leaf_font_size=11) locs, ll = plt.xticks() plt.setp(ll, rotation=90) outliers = [] # Remove outliers for i in range((len(clusters) - 1), -1, -1): if clusters[i] not in mfe: print("Outlier stock: " + str(labels2[i])) outliers.append(labels2[i]) dist_mat = np.delete(dist_mat, i, 0) dist_mat = np.delete(dist_mat, i, 1) del labels2[i] print("Total Outliers: " + str(len(outliers))) # Finally do the clustering !!! ward = linkage(dist_mat, method='ward', metric='euclidean') clusters2 = fcluster(ward, 3, 'maxclust') #print(clusters2) #print(labels2) plt.subplot(212) plt.title("After removing outliers") if num > 50: dendrogram(ward, leaf_font_size=10) else: dendrogram(ward, labels=labels2, leaf_font_size=11) plt.subplots_adjust(hspace=.5) locs, ll = plt.xticks() plt.setp(ll, rotation=90) plots.append(f3) colormap = plt.cm.gist_ncar f4 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters with Initial Timeseries") for i in range(1, 4): plt.subplot(int(310 + i)) f4.gca().set_color_cycle( [colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters1)): if clusters1[j] == i: data[labels1[j]].plot(label=labels1[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4, prop={'size': 10}) plt.subplots_adjust(hspace=.55) plots.append(f4) f5 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters based on Summarization Timeseries") for i in range(1, 4): plt.subplot(int(310 + i)) f4.gca().set_color_cycle( [colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters2)): if clusters2[j] == i: data[labels2[j]].plot(label=labels2[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4, prop={'size': 10}) plt.subplots_adjust(hspace=.55) plots.append(f5) f6 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters with Summarization Timeseries") for i in range(1, 4): plt.subplot(int(310 + i)) colormap = plt.cm.gist_ncar f4.gca().set_color_cycle( [colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters2)): if clusters2[j] == i: (sum_t[labels2[j]] / 10).plot(label=labels2[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4, prop={'size': 10}) plt.subplots_adjust(hspace=.55) plots.append(f6) return (plots)
mGamma[i][0] = inf for i in range(alen+1): mGamma[0][i] = inf mGamma[0][0] = 0 for i in range(blen): for j in range(alen): cost = 1-numpy.corrcoef(a[j], b[i])[0,1] mGamma[i+1][j+1] = cost + min(mGamma[i][j], mGamma[i+1][j], mGamma[i][j+1]) return mGamma[blen][alen] dist = [[] for index in range(len(chromaset))] for i in range(len(chromaset)): tempdist = [] for j in range(12): tem, cost, p = mlpy.dtw_std(chromaset[i], hum_chroma, dist_only=False) tempdist.append(tem) hum_chroma = mod(hum_chroma+ones(len(hum_chroma)), 12) dist[i] = min(tempdist) print song_list print dist match = song_list[dist.index(min(dist))] for i in range(3): out1=min(dist) print song_list[dist.index(out1)] song_list.remove(song_list[dist.index(out1)]) dist.remove(out1) execfile('all.py')
tn_female = np.array(data_female.tonic) tn_male = mynorm_maxmin(tn_male) tn_female = mynorm_maxmin(tn_female) indexes = np.arange(len(tn_male)) keep = (indexes % N_SAMP == 0) tn_male = np.array(tn_male[keep]) tn_female = np.array(tn_female[keep]) # SAMP_F = 1.0 / (data_male.iloc[1,0] - data_male.iloc[0,0]) # timestamp = np.arange(0, l/SAMP_F, 1.0/SAMP_F) # labs = np.array(data_female.iloc[:,-1]) # windows, labels = wnd.get_windows_no_mix(timestamp, labs, WINLEN, WINSTEP) #extrazione porzione da entrambi i file # n_col = data_male.shape[1] # for i in range(1, n_col - 1): dtw_curr= mlpy.dtw_std(tn_male, tn_female) dtw_curr = dtw_curr/ len(tn_male) # dtw_curr = modified_extract_features(tn_male, tn_female, windows, labels, timestamp) dtw_measures.append(dtw_curr) res_exp = np.vstack([experiments, dtw_measures]) np.savetxt('F_dtw_all_signal.csv', res_exp, delimiter=',') #print dtw_measures #salvare dtw_measures su file di testo
def dtwDistance(x, y): dis, cost, path = mlpy.dtw_std(x, y, dist_only=False) return dis, path
data[i] = (data[i] - minvals[index])/ranges[index] # plt.figure(1) # plt.plot(data[0], label='Prox1') # plt.plot(data[1], label='Prox2') # plt.plot(data[2], label='Prox4') # plt.legend() #transpose for PCA, then transpose back pcaData = PCA(data.T).Y.T # plt.figure(2) # plt.plot(pcaData[0], label='PCA0') # plt.plot(pcaData[1], label='PCA1') # plt.plot(pcaData[2], label='PCA2') # plt.legend() dist = 0 if prevPCA is not None: dist = mlpy.dtw_std(pcaData[0], prevPCA, dist_only=True) print dist prevPCA = pcaData[0] result = 'Dist: ' + str(dist) start = False finish = False resultReady = True #plt.show()
def sDTW(query, subject): M, N = len(query), len(subject) for i in range(N - M + 1): mlpy.dtw_std(query, subject[i:i + M], dist_only=True)
f.close() indx = 0 f = open( '', 'rU' ) #Provide dataset 2 file path for comparison for line in f: cells = line.split(",") b.append((float)(cells["X"])) #X = Provide the respective column of the dataset which needs to be analysed indx = indx + 1 if indx == dtw_data_limit: break f.close() dist, cost, path = mlpy.dtw_std(a, b, dist_only=False) print("Distance between a and b temporal sequneces") print(dist) print("############") #End - Distance Calculation #Start - Plot plt.figure("Two temporal sequnences") plt.plot(a) plt.plot(b) fig = plt.figure("Accumulated Cost Matrix & warping path") ax = fig.add_subplot("ACM & WP")
for i in range(s): d, p = fastdtw(eps_sueno[i].stt, eps_sueno[i].stt, dist=euclidean) dd, p = fastdtw(eps_sueno[i].stf, eps_sueno[i].stf, dist=euclidean) dt = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[i].stt, dist_only=True) df = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[i].stf, dist_only=True) print d, dd, dt, df """ #Calcular matriz de distancias entre cada individuo por DTW s = len(eps_sueno) distancias = np.zeros((s, s)) for i in range(s): for j in range(s): #distanceTemp , path = fastdtw(eps_sueno[i].stt, eps_sueno[j].stt, dist=euclidean) #Distancia en temperatura #distanceFlujo , path = fastdtw(eps_sueno[i].stf, eps_sueno[j].stf, dist=euclidean) #Distancia en flujo distanceTemp = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[j].stt, dist_only=True) #Dist. euclidea distanceFlujo = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[j].stf, dist_only=True) distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total print '.' #Vector con las distancias requeridas para hacer clustering print distancias print distancias.shape """ Resultados: centroid: 0.82848866781 single: 0.340428699013 complete: 0.80537453305 average: 0.827708738138 weighted: 0.816403408353
querynames[index] = map(float, querynames[index]) querynames[index] = np.asarray(querynames[index]) #window count = 0 for window in range(0, (len(querynames[-1]) / 250)): y = querynames[-1][count:count + 500] print(querynames[-1][count:count + 500]) #test each amplicon against F reference for amp in range(0, 11): #amplicons 1F x = querynames[amp] #mlpy timeb = datetime.now() mlpystddist, mlpystdcost, mlpystdpath = mlpy.dtw_std(x, y, dist_only=False) timet = datetime.now() - timeb print("mlpy complete on amp " + str(amp + 1)) with open("bench_log.txt", "a") as text_file: text_file.write("\n" + str(amp + 1) + "," + str(window + 1) + ",mlpy," + str(mlpystddist) + "," + str(timet.microseconds) + ',' + str(mlpystdpath[1][0] + count) + ',' + str(mlpystdpath[1][-1] + count)) path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_query_mlpy.txt', mlpystdpath[0], delimiter=',') path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" + str(window + 1) + '_ref_mlpy.txt',
def test_dtw_example(): """ Test using code """ #Wind to get DTW solution window = pd.to_timedelta(1. * 3600., unit='s') #Setup format for datetime string to pass to my_dtw later dfmt = '{0:%Y/%m/%d %H:%M:%S}' #twind4 = pd.to_datetime('2016/12/09 04:45:29') #start_t4 = dfmt.format(twind4-2.5*window) #end_t4 = dfmt.format(twind4+3.5*window) twind4 = pd.to_datetime('2016/12/21 08:43:12') start_t4 = dfmt.format(twind4 - 2.5 * window) end_t4 = dfmt.format(twind4 + 3.5 * window) #my_dtw4 = mtr.dtw_plane(start_t4,end_t4,nproc=4,penalty=True,events=7,earth_craft=['THEMIS_B'],par=['Bt'],speed_pen=500,mag_pen=100.2) my_dtw4 = mtr.dtw_plane(start_t4, end_t4, nproc=4, penalty=True, events=7, par=['Bt'], earth_craft=['THEMIS_B'], speed_pen=500, mag_pen=100.2) my_dtw4.init_read() my_dtw4.iterate_dtw() #my_dtw4.pred_earth() #mtr.omni_plot(my_dtw4) sc1 = 'Wind' sc2 = 'SOHO' x1 = np.array(my_dtw4.plsm[sc1].SPEED.ffill().bfill().values, dtype=np.double) x2 = np.array(my_dtw4.plsm[sc2].SPEED.ffill().bfill().values, dtype=np.double) #Example DTW plot p1, p2, cost1 = md.dtw_path_single(x1, x2, 300, 30, 500.0, 0.0, 0.5, 1) #p1,p2,cost = md.dtw_path_single(x2,x2],2700,2700/2,0.0,0.01,1) #mlpy example path dist, costa, path = mlpy.dtw_std(x1, x2, dist_only=False) pa, pb = path[0], path[1] #create multi panel diagnostic plot 2018/11/26 J. Prchlik fig, ax = plt.subplots(nrows=2, ncols=2, gridspec_kw={ 'height_ratios': [2, 1], 'width_ratios': [1, 2] }, figsize=(8, 8)) fig.subplots_adjust(hspace=0.05, wspace=0.05) #turn off bottom left axis ax[1, 0].axis('off') lims = mdates.date2num([ my_dtw4.plsm[sc1].index.min(), my_dtw4.plsm[sc1].index.max(), my_dtw4.plsm[sc2].index.min(), my_dtw4.plsm[sc2].index.max() ]) v_max, v_min = np.percentile(costa, [95, 15]) ax[0, 1].imshow(costa, extent=lims, origin='lower', cmap=plt.cm.gray.reversed(), vmin=v_min, vmax=v_max, aspect='auto') #ax.imshow(cost,extent=[0,x2.size,0,x2[::2].size],origin='lower') ax[0, 1].plot(my_dtw4.plsm[sc1].iloc[p1, :].index, my_dtw4.plsm[sc2].iloc[p2, :].index, '--', color='black') ax[0, 1].plot(my_dtw4.plsm[sc1].iloc[pa, :].index, my_dtw4.plsm[sc2].iloc[pb, :].index, '-', color='red') ax[0, 1].xaxis_date() ax[0, 1].yaxis_date() date_format = mdates.DateFormatter('%H:%M') #plot the plasma values on the off axes ax[1, 1].plot(my_dtw4.plsm[sc1].index, x1, color='blue') ax[0, 0].plot(x2, my_dtw4.plsm[sc2].index, color='teal') #set up axis formats ax[1, 1].xaxis_date() ax[0, 0].yaxis_date() #force limits to be the same as the cost matrxi ax[1, 1].set_xlim(lims[:2]) ax[0, 0].set_ylim(lims[2:]) #Format the printed dates ax[1, 1].xaxis.set_major_formatter(date_format) ax[0, 0].yaxis.set_major_formatter(date_format) #Add label time ax[1, 1].set_xlabel(sc1 + ' Time [UTC]') ax[0, 0].set_ylabel(sc2 + ' Time [UTC]') #Add label for Speeds ax[1, 1].set_ylabel('Flow Speed [km/s]') ax[0, 0].set_xlabel('Flow Speed [km/s]') #turn off y-tick labels in center plot ax[0, 1].set_xticklabels([]) ax[0, 1].set_yticklabels([]) #set Wind and SOHO to have the same plasma paramter limits pls_lim = [420., 675.] ax[0, 0].set_xlim(pls_lim) ax[1, 1].set_ylim(pls_lim) #copy y-axis labels from Wind plot to SOHO plot ax[0, 0].set_xticks(ax[1, 1].get_yticks()) ax[0, 0].set_xlim(pls_lim) ax[1, 1].set_ylim(pls_lim) ##ax[0,0].set_xlabel('Flow Speed [km/s]') #clean up the axes with plasma data fancy_plot(ax[0, 0]) fancy_plot(ax[1, 1]) # This simply sets the x-axis data to diagonal so it fits better. #fig.autofmt_xdate() fig.savefig('../plots/example_dtw_path.png', bbox_pad=.1, bbox_inches='tight') fig.savefig('../plots/example_dtw_path.eps', bbox_pad=.1, bbox_inches='tight') return x1, x2, my_dtw4
thumpVotes = 0 nailVotes = 0 velcroVotes = 0 totalVotes = 0 fftrmsarray = [] peaktopeak = np.max(audioWindow) - np.min(audioWindow) rms = np.sqrt(np.mean(np.square(audioWindow))) if rms > 700: prevTime = t normalized = audioWindow/(peaktopeak/2.0) totalVotes += 1 for template in templates['t']: thumpDist = mlpy.dtw_std(normalized, template, dist_only=True) # if thumpDist < 90: thumpVotes += 1 thumpVotes += 90/thumpDist print 'ThumpDist = ' + str(thumpDist) for template in templates['n']: nailDist = mlpy.dtw_std(normalized, template, dist_only=True) # if nailDist < 200: nailVotes += 1 nailVotes += 200/nailDist print 'NailDist = ' + str(nailDist) for template in templates['v']: velcroDist = mlpy.dtw_std(normalized, template, dist_only=True) # if velcroDist < 120: velcroVotes += 1 velcroVotes += 120/velcroDist print 'VelcroDist = ' + str(velcroDist)
from PIL import Image from numpy import array import os import pprint import mlpy from collections import OrderedDict data = {} l = len(os.listdir("image")) for fn in range(0, l - 1): img = Image.open("image\\{0}.jpg".format(fn)) arr = array(img) list = [] for n in arr: list.append(n[0][0]) #R for n in arr: list.append(n[0][1]) #G for n in arr: list.append(n[0][2]) #B data[fn] = list reference = data[31] result = {} for x, y in data.items(): #print("{0} ----------------- {1}".format(x,y)) dist = mlpy.dtw_std(reference, y, dist_only=True) result[x] = dist sortedRes = OrderedDict(sorted(result.items(), key=lambda x: x[1])) for a, b in sortedRes.items(): print("{0} - {1}".format(a, b))
def cluster(data, sum_t, num): ############## Clustering Initial Timeseries ################ plots = [] D = len(data) # Calculate distance matrix using DTW dist_mat = np.empty(shape=(D,D)) ii = 0 labels1=[] for i in data.keys(): jj = 0 for j in data.keys(): if ii==jj: dist_mat[ii][jj] = 0 else: dist_mat[ii][jj] = dtw_std([z for z in data[i]], [z for z in data[j]], dist_only=True) jj += 1 ii += 1 labels1.append(i) # Use average linkage + DTW to remove outliers avg = linkage(squareform(dist_mat), method='average', metric='euclidean') clusters = fcluster(avg, floor(log(num)/log(2)),'maxclust') #print(clusters) freq = {} for i in clusters: if i not in freq: freq[i] = 1 else: freq[i] += 1 # Stocks which appear in almost empty clusters are considered outliers thresh = max(freq.values()) mfe = [k for k,v in freq.items() if v > floor(num/10)] f = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(211) plt.title("With Outliers") if num > 50: dendrogram(avg, leaf_font_size=10) else: dendrogram(avg, labels=labels1,leaf_font_size=11) locs, ll=plt.xticks() plt.setp(ll, rotation=90) # Remove outliers and their labels outliers = [] for i in range((len(clusters)-1),-1,-1): if clusters[i] not in mfe: print("Outlier stock: "+str(labels1[i])) outliers.append(labels1[i]) dist_mat = np.delete(dist_mat, i, 0) dist_mat = np.delete(dist_mat, i, 1) del labels1[i] print("Total Outliers: "+ str(len(outliers))) # Finally do the clustering !!! ward = linkage(dist_mat, method='ward', metric='euclidean') clusters1= fcluster(ward, 3,'maxclust') #print(clusters1) #print(labels1) plt.subplot(212) plt.title("After removing outliers") if num > 50: dendrogram(ward, no_labels=True, leaf_font_size=10) else: dendrogram(ward, labels=labels1,leaf_font_size=11) plt.subplots_adjust(hspace=.5) locs, ll=plt.xticks() plt.setp(ll, rotation=90) plots.append(f) ########### Plot Initials & Summaries ############ f2 = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(211) colormap = plt.cm.gist_ncar f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)]) for k in data.keys(): data[k].plot() plt.subplot(212) f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)]) for k in sum_t.keys(): (sum_t[k]/10).plot() plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.subplots_adjust(hspace=.32) plots.append(f2) ############# Clustering Summarized Timeseries ############ S = len(sum_t) dist_mat = np.empty(shape=(S,S)) ii = 0 labels2=[] # Calculate distance matrix using DTW for i in sum_t.keys(): jj = 0 for j in sum_t.keys(): if ii==jj: dist_mat[ii][jj] = 0 else: dist_mat[ii][jj] = dtw_std([z for z in sum_t[i]], [z for z in sum_t[j]], dist_only=True) jj += 1 ii += 1 labels2.append(i) # Use average linkage + DTW to remove outliers avg = linkage(squareform(dist_mat), method='average', metric='euclidean') clusters= fcluster(avg, floor(log(num,2)),'maxclust') #print(clusters) freq = {} for i in clusters: if i not in freq: freq[i] = 1 else: freq[i] += 1 # Stocks which appear in almost empty clusters are considered outliers thresh = max(freq.values()) mfe = [k for k,v in freq.items() if v > floor(num/10)] f3 = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(211) plt.title("With Outliers") if num > 50: dendrogram(avg, no_labels=True, leaf_font_size=10) else: dendrogram(avg, labels=labels2,leaf_font_size=11) locs, ll=plt.xticks() plt.setp(ll, rotation=90) outliers = [] # Remove outliers for i in range((len(clusters)-1),-1,-1): if clusters[i] not in mfe: print("Outlier stock: "+str(labels2[i])) outliers.append(labels2[i]) dist_mat = np.delete(dist_mat, i, 0) dist_mat = np.delete(dist_mat, i, 1) del labels2[i] print("Total Outliers: "+ str(len(outliers))) # Finally do the clustering !!! ward = linkage(dist_mat, method='ward', metric='euclidean') clusters2= fcluster(ward, 3,'maxclust') #print(clusters2) #print(labels2) plt.subplot(212) plt.title("After removing outliers") if num > 50: dendrogram(ward, leaf_font_size=10) else: dendrogram(ward, labels=labels2,leaf_font_size=11) plt.subplots_adjust(hspace=.5) locs, ll=plt.xticks() plt.setp(ll, rotation=90) plots.append(f3) colormap = plt.cm.gist_ncar f4 = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters with Initial Timeseries") for i in range(1,4): plt.subplot(int(310+i)) f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters1)): if clusters1[j] == i: data[labels1[j]].plot(label=labels1[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4,prop={'size':10}) plt.subplots_adjust(hspace=.55) plots.append(f4) f5 = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters based on Summarization Timeseries") for i in range(1,4): plt.subplot(int(310+i)) f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters2)): if clusters2[j] == i: data[labels2[j]].plot(label=labels2[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4,prop={'size':10}) plt.subplots_adjust(hspace=.55) plots.append(f5) f6 = plt.figure(figsize=(6,6), dpi=100, facecolor='white') plt.subplot(311) plt.title("Clusters with Summarization Timeseries") for i in range(1,4): plt.subplot(int(310+i)) colormap = plt.cm.gist_ncar f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)]) for j in range(len(clusters2)): if clusters2[j] == i: (sum_t[labels2[j]]/10).plot(label=labels2[j]) #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1) plt.legend(loc=4,prop={'size':10}) plt.subplots_adjust(hspace=.55) plots.append(f6) return(plots)
if counts[1][1] / float(counts[0][1]) > 0.8: print 'Ambiguous label for sampleid = %d : counts : %s' % (sampleid, counts) #print counts return counts[0][0] ## newlabels = np.array([knn_DM(DM, sampleid) for sampleid in xrange(len(labels))]) modified = np.flatnonzero(newlabels - labels) ## Write a new file with open('out.txt', 'w') as f: utils.write_data(f, accel, gyro, labels) ## sample1 = 18 sample2 = 151 dist, cost, path = mlpy.dtw_std(accel[sample1,0,:], accel[sample2,0,:], dist_only=False) pl.figure() pl.suptitle('dist = %f' % dist) pl.subplot(211); pl.title('%d'%sample1); pl.plot(accel[sample1,0,:]); pl.ylim(0, 5000); pl.subplot(212); pl.title('%d'%sample2); pl.plot(accel[sample2,0,:]); pl.ylim(0,5000); pl.figure() pl.title('%d - %d' % (sample1, sample2)) pl.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') pl.plot(path[0], path[1], 'w') pl.xlim((-0.5, cost.shape[0]-0.5)) pl.ylim((-0.5, cost.shape[1]-0.5)) ##
for i in range(alen + 1): mGamma[0][i] = inf mGamma[0][0] = 0 for i in range(blen): for j in range(alen): cost = 1 - numpy.corrcoef(a[j], b[i])[0, 1] mGamma[i + 1][j + 1] = cost + min(mGamma[i][j], mGamma[i + 1][j], mGamma[i][j + 1]) return mGamma[blen][alen] dist = [[] for index in range(len(chromaset))] for i in range(len(chromaset)): tempdist = [] for j in range(12): tem, cost, p = mlpy.dtw_std(chromaset[i], hum_chroma, dist_only=False) tempdist.append(tem) hum_chroma = mod(hum_chroma + ones(len(hum_chroma)), 12) dist[i] = min(tempdist) print song_list print dist match = song_list[dist.index(min(dist))] for i in range(3): out1 = min(dist) print song_list[dist.index(out1)] song_list.remove(song_list[dist.index(out1)]) dist.remove(out1) execfile('all.py')
Comparisons-=1 # parameter descriptions #https://github.com/sauliusl/mlpy/blob/master/mlpy/dtw/dtw.pyx #dont compare if the dont have a 75 differece if len(Hand_Series[real])>len(SynthSeries[pseudo]) and len(SynthSeries[pseudo])/float(len(Hand_Series[real]))<0.65: # print 'skip' dist=float("inf") elif len(SynthSeries[pseudo])>len(Hand_Series[real]) and len(Hand_Series[real])/float(len(SynthSeries[pseudo]))<0.65: # print 'skip' dist=float("inf") else: try: if 'SC' in Zoni: dist,cost,path=mlpy.dtw_std(SynthSeries[pseudo],Hand_Series[real], dist_only=False, metric='sqeuclidean', constraint='sakoe_chiba', k=PlatosZonis) else: dist,cost,path=mlpy.dtw_std(SynthSeries[pseudo],Hand_Series[real], dist_only=False, metric='euclidean', constraint='slanted-band', k=PlatosZonis) except Exception as ex: sys.exc_clear() with open('/tmp/col_lathos.txt','w') as arxeio: arxeio.write("dtw fak") finally: dist=dist/float(len(SynthSeries[pseudo]) + len(Hand_Series[real])) print 'Normalized cost',dist print 'DTW cost',SynthNames[pseudo],'and',Hand_Names[real], transcipt,dist
for w in xrange(0,len(SetTwoNames)): Comps-=1 dist=1000 if len(SetTwoFeats[w])>len(SetOneFeats[q]) and len(SetOneFeats[q])/float(len(SetTwoFeats[w]))<0.65: # print 'skip' dist=float("inf") elif len(SetOneFeats[q])>len(SetTwoFeats[w]) and len(SetTwoFeats[w])/float(len(SetOneFeats[q]))<0.65: # print 'skip' dist=float("inf") else: dist,cost,path=mlpy.dtw_std(SetTwoFeats[w],SetOneFeats[q], dist_only=False, metric='euclidean', constraint='slanted-band', k=50) dist=dist/float(len(SetOneFeats[q]) + len(SetTwoFeats[w])) synth=SetTwoNames[w] res_transcript='' keyword='' pos=0 for c in xrange(0,len(synth)): if '_' == synth[c]: pos=c+1 keyword=synth[pos:].replace('.png','')
import json import numpy from scipy import interpolate import mlpy from main.util.common import readFromJson lines = readFromJson("normalized_points.json") timelines = map(lambda line: map(lambda x: x[1], json.loads(line)["twitter-data"]), lines) numTimelines = len(timelines) distances = [] for i in range(0, numTimelines): for j in range(i + 1, numTimelines): distances.append(mlpy.dtw_std(timelines[i], timelines[j])) print json.dumps(distances)
# (e.g. 0, 0, 1, 1, 1, 2, 3, 3, 3) for i in range(rr(0, steps)): seq.append(n) return seq if DEBUG: with Section('Dynamic Time Warping algorithm - MLPY'): # Using MLPY: # First, make sure deps are setup. # `brew install gsl` # Download from SF: http://mlpy.sourceforge.net/ # Then install using setup.py: # `cd MLPY_PATH/setup.py install` # Now this makes it fun. x, y = random_timesequence(0, 10), random_timesequence(0, 10) # Taken from examples: http://mlpy.sourceforge.net/docs/3.5/dtw.html#id3 distance, cost, path = mlpy.dtw_std(x, y, dist_only=False) fig = plot.figure(1) axes = fig.add_subplot(111) plot1 = plot.imshow( cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') plot2 = plot.plot(path[0], path[1], 'w') bound = 0.5 xlim = axes.set_xlim((-bound, cost.shape[0] - bound)) ylim = axes.set_ylim((-bound, cost.shape[1] - bound)) plot.show()