def __init__(self, selepisodio, tf=False, cons=False):
        #Representa un episodio de sueño mediante las series temporales de flujo y temperatura
        class Individuo:
            def __init__(self, nombre, tiempo, temperatura=[], flujo=[], consumo=[]):
                self.nombre = nombre
                self.tiempo = tiempo
                self.stt = temperatura
                self.stf = flujo
                self.stc = consumo
        
        sel = selepisodio
        
        print "Normalizar", len(sel.epFiltro), "episodios de sueño"
        # Normalizar por estandarización cada episodio de sueño (temperatura y flujo)
        self.eps_sueno = []
        if(tf):
            for i in sel.epFiltro:
                a = preprocessing.scale(i.temp, copy=True)
                b = preprocessing.scale(i.flujo, copy=True)
                self.eps_sueno.append(Individuo(i.nombre, i.tiempo, temperatura=a, flujo=b))
        elif(cons):
            for i in sel.epFiltro:
                a = preprocessing.scale(i.consumo, copy=True)
                self.eps_sueno.append(Individuo(i.nombre, i.tiempo, consumo=a))
        """
        #La diagonal de distancias no da 0 con fastdtw, mismas ST dan distancias >0 !!!
        for i in range(s):
            print eps_sueno[i].stt[-1], eps_sueno[i].stt[-1]
        for i in range(s):
            d, p = fastdtw(eps_sueno[i].stt, eps_sueno[i].stt, dist=euclidean)
            dd, p = fastdtw(eps_sueno[i].stf, eps_sueno[i].stf, dist=euclidean)
            dt = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[i].stt, dist_only=True)
            df = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[i].stf, dist_only=True)
            print d, dd, dt, df
        """
        #Calcular matriz de distancias entre cada individuo por DTW
        s = len(self.eps_sueno)
        self.distancias = np.zeros((s, s))
        if(tf):
            for i in range(s):
                for j in range(s):
                    #distanceTemp , path = fastdtw(eps_sueno[i].stt, eps_sueno[j].stt, dist=euclidean) #Distancia en temperatura
                    #distanceFlujo , path = fastdtw(eps_sueno[i].stf, eps_sueno[j].stf, dist=euclidean) #Distancia en flujo
                    distanceTemp = mlpy.dtw_std(self.eps_sueno[i].stt, self.eps_sueno[j].stt, dist_only=True) #Dist. euclidea
                    distanceFlujo = mlpy.dtw_std(self.eps_sueno[i].stf, self.eps_sueno[j].stf, dist_only=True)
                    self.distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total
        elif(cons):
            for i in range(s):
                for j in range(s):
                    self.distancias[j][i] = mlpy.dtw_std(self.eps_sueno[i].stc, self.eps_sueno[j].stc, dist_only=True) #Dist. euclidea
        #Vector con las distancias requeridas para hacer clustering
        #print distancias
        print self.distancias.shape

        #Obtener la diagonal de la matriz de distancias
        dists = ssd.squareform(self.distancias)
        print dists
        #Calcular clustering jerárquico
        self.Z = linkage(dists, 'average')
    def myDTW(self, text_file, arr1, arr2, arr3, arr4, arr5, arr6, arr7, arr8,
              arr9):
        df = pd.read_csv(text_file, sep=' ', header=None)
        x1 = df.iloc[0:75, 0].values
        y1 = df.iloc[0:75, 1].values
        z1 = df.iloc[0:75, 2].values
        x2 = df.iloc[0:75, 3].values
        y2 = df.iloc[0:75, 4].values
        z2 = df.iloc[0:75, 5].values
        x3 = df.iloc[0:75, 6].values
        y3 = df.iloc[0:75, 7].values
        z3 = df.iloc[0:75, 8].values

        dist1 = mlpy.dtw_std(x1, arr1, dist_only=True)
        #print(text_file + " distX: ", dist1)
        dist2 = mlpy.dtw_std(y1, arr2, dist_only=True)
        #print(text_file + " distY: ", dist2)
        dist3 = mlpy.dtw_std(z1, arr3, dist_only=True)
        #print(text_file + " distZ: ", dist3)

        dist4 = mlpy.dtw_std(x2, arr4, dist_only=True)
        #print(text_file + " distX: ", dist1)
        dist5 = mlpy.dtw_std(y2, arr5, dist_only=True)
        #print(text_file + " distY: ", dist2)
        dist6 = mlpy.dtw_std(z2, arr6, dist_only=True)
        #print(text_file + " distZ: ", dist3)

        dist7 = mlpy.dtw_std(x3, arr7, dist_only=True)
        #print(text_file + " distX: ", dist1)
        dist8 = mlpy.dtw_std(y3, arr8, dist_only=True)
        #print(text_file + " distY: ", dist2)
        dist9 = mlpy.dtw_std(z3, arr9, dist_only=True)
        #print(text_file + " distZ: ", dist3)

        return dist1 + dist2 + dist3 + dist4 + dist5 + dist6 + dist7 + dist8 + dist9
    def __init__(self, episodios, tf=False, cons=False):
        #Representa un episodio de sueño mediante las series temporales de flujo y temperatura
        class Individuo:
            def __init__(self, nombre, tiempo, temperatura=[], flujo=[], consumo=[]):
                self.nombre = nombre
                self.tiempo = tiempo
                self.stt = temperatura
                self.stf = flujo
                self.stc = consumo
        
        if(DEBUG): print "Normalizar", len(episodios), "episodios de sueño"
        # Normalizar por estandarización cada episodio de sueño (temperatura y flujo)
        self.eps_sueno = []
        if(tf):
            for i in episodios:
                a = preprocessing.scale(i.temp, copy=True)
                b = preprocessing.scale(i.flujo, copy=True)
                self.eps_sueno.append(Individuo(i.nombre, i.tiempo, temperatura=a, flujo=b))
        elif(cons):
            for i in episodios:
                a = preprocessing.scale(i.consumo, copy=True)
                self.eps_sueno.append(Individuo(i.nombre, i.tiempo, consumo=a))
        
        #Calcular matriz de distancias entre cada individuo por DTW
        s = len(self.eps_sueno)
        self.distancias = np.zeros((s, s))
        if(tf):
            for i in range(s):
                for j in range(s):
                    distanceTemp = mlpy.dtw_std(self.eps_sueno[i].stt, self.eps_sueno[j].stt, dist_only=True)
                    distanceFlujo = mlpy.dtw_std(self.eps_sueno[i].stf, self.eps_sueno[j].stf, dist_only=True)
                    self.distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total
        elif(cons):
            for i in range(s):
                for j in range(s):
                    self.distancias[j][i] = mlpy.dtw_std(self.eps_sueno[i].stc, self.eps_sueno[j].stc, dist_only=True) #Dist. euclidea
                    
        #Vector con las distancias requeridas para hacer clustering
        if(DEBUG): print "Matriz de distancias", self.distancias.shape, self.distancias

        #Obtener la diagonal de la matriz de distancias
        dists = ssd.squareform(self.distancias)
        #Calcular clustering jerárquico
        self.Z = linkage(dists, 'average')

        #Etiquetas de cada episodio para mostrar en el dendrograma
        self.labels=[]
        for i in self.eps_sueno:
            self.labels.append(i.nombre)
示例#4
0
    def _dist_matrix(self, x, y):
        """Computes the M x N distance matrix between the training
        dataset and testing dataset (y) using the DTW distance measure

        Arguments
        ---------
        x : array of shape [n_samples, n_timepoints]

        y : array of shape [n_samples, n_timepoints]

        Returns
        -------
        Distance matrix between each item of x and y with
            shape [training_n_samples, testing_n_samples]
        """

        # Compute the distance matrix
        dm_count = 0
        x_s = np.shape(x)
        y_s = np.shape(y)
        dm = np.zeros((x_s[0], y_s[0]))
        dm_size = x_s[0] * y_s[0]

        total = dm_size
        p = ProgressBar()

        for i in xrange(0, x_s[0]):
            for j in xrange(0, y_s[0]):
                dm[i, j] = mlpy.dtw_std(x[i, :], y[j, :], dist_only=True)
                # Update progress bar
                dm_count += 1
            if self.progress_bar:
                p(float(dm_count) / total * 100)

        return dm
示例#5
0
def classifyImg():
    fileList = [
        x for x in listdir(r'F:\PY\data\Img') if x.lower().endswith(".jpg")
    ]
    m = len(fileList)

    for fn in range(m):
        img = Image.open(r'F:\PY\data\Img\{0}'.format(fileList[fn]))
        arr = array(img)
        list = []
        if arr.ndim == 2:
            print fileList[fn]
            continue
        for n in arr:
            list.append(n[0][0])
        for n in arr:
            list.append(n[0][1])
        for n in arr:
            list.append(n[0][2])

        data[fileList[fn]] = list
    reference = data['007_0025.jpg']
    result = {}

    for x, y in data.items():
        dist = mlpy.dtw_std(reference, y, dist_only=True)
        result[x] = dist

    sortedRes = OrderedDict(sorted(result.items(), key=lambda x: x[1]))

    for a, b in sortedRes.items():
        print("{0} - {1}".format(a, b))
        i = i + 1
        if i == 10:
            break
def dtw_checker(inputFile1, inputFile2):
    ''' COMPARES TO GIVEN FILES '''

    # DEFINING CONSTANTS
    DIST_STEP = 2000

    (inAudio1, fs1) = file_reader(inputFile1)
    (inAudio2, fs2) = file_reader(inputFile2)

    # APPLY DTW ALGORITHM (http://mlpy.sourceforge.net/docs/3.5/dtw.html)   
    dist_array = []
    i = 1

    while (i < len(inAudio1)):
        (dist, cost, path) = mlpy.dtw_std(inAudio1[i:DIST_STEP+i], inAudio2[i:DIST_STEP+i], dist_only=False)
        dist_array.append(dist)
        i+=DIST_STEP

    dist_array = numpy.asarray(dist_array)
    dist_final = numpy.mean(dist_array)

    result = os.path.basename(inputFile1) + " -- " + os.path.basename(inputFile2) + " ==> " + str(dist_final)
#    print result

    return (result, dist_final)
示例#7
0
def keywordSpotter(topN):
    f = open(KEYWORDS, 'r')
    keywords = f.readlines()
    f.close()
    featureDict = calculateTestFeatures()
    for k in keywords:
        trainsample = getTrainSample(k)
        # dictionary e.g.: {'302-30-07': 'h-u-n-d-r-e-d\n', '301-16-08': 'h-u-n-d-r-e-d\n'}
        testSamples = getTestSamples(k)
        img = Image.open(IMG_PATH + trainsample + IMG_ENDING)
        x = np.asarray(img)
        x = extractFeatures(x)

        distDict = {}
        totalWords = 0
        for filename in os.listdir(IMG_PATH):
            #if (int(filename[0:3]) >= 300):
            if (int(filename[0:3]) >= 200):  #take all
                totalWords += 1
                y = featureDict[filename]
                dist, cost, path = mlpy.dtw_std(x, y, dist_only=False)

                if len(distDict) < topN:
                    distDict[filename[0:9]] = dist

                else:
                    if distDict[max(distDict, key=distDict.get)] > dist:
                        del distDict[max(distDict, key=distDict.get)]
                        distDict[filename[0:9]] = dist

        # print distDict
        evaluation(distDict, testSamples, totalWords, k)
示例#8
0
def mlpy_example():

    x = np.array([0, 0, 0, 0, 1, 1, 2, 2, 3, 2, 1, 1, 0, 0, 0, 0],
                 dtype=np.double)
    y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0],
                 dtype=np.double)
    test_1, test_2, cost_m = md.dtw_path_single(x, y, 200, 200, 100, 5.0, 5.0,
                                                1)

    dist, cost, path = mlpy.dtw_std(x, y, dist_only=False)
    true_1 = path[0]
    true_2 = path[1]
    #test the program returns the answers in the correct order
    ind_1 = np.sum(np.abs(test_1 - true_1))
    ind_2 = np.sum(np.abs(test_2 - true_2))

    #print(cost_m-cost)
    #fig, ax =plt.subplots(ncols=2,sharex=True,sharey=True)
    #ax[0].imshow(cost_m.T,extent=[0,x.size,0,y.size],origin='lower')
    #ax[1].imshow(cost.T  ,extent=[0,x.size,0,y.size],origin='lower')
    ##ax.imshow(cost,extent=[0,x2.size,0,x2[::2].size],origin='lower')
    #ax[0].plot(test_1,test_2,'--',color='black')
    #ax[1].plot(true_1,true_2,'.-',color='red')
    #plt.show()

    #use the sum of 0 integers to confirm it is true
    assert ind_1 == 0
    assert ind_2 == 0
    assert np.allclose(cost_m, cost)
示例#9
0
def dtw(x, y):
    x = np.genfromtxt(
        x, delimiter=','
    )  #DTW correlate between two signals to calculate the distance
    y = np.genfromtxt(y, delimiter=',')
    x = downsampling(x)
    y = downsampling(y)
    x = x[:, 1]
    y = y[:, 1]
    x = (x - np.mean(x)) / np.std(x)
    y = (y - np.mean(y)) / np.std(y)
    plt.plot(x)
    plt.plot(y)
    plt.show()
    dist, cost, path = mlpy.dtw_std(x, y, dist_only=False)
    print np.array(dist)
    fig = plt.figure(1)
    ax = fig.add_subplot(111)
    plot1 = plt.imshow(cost.T,
                       origin='lower',
                       cmap=cm.gray,
                       interpolation='nearest')
    plot2 = plt.plot(path[0], path[1], 'w')
    xlim = ax.set_xlim((-0.5, cost.shape[0] - 0.5))
    ylim = ax.set_ylim((-0.5, cost.shape[1] - 0.5))
    plt.show()
    return dist
示例#10
0
def modified_extract_features(data_male, data_female, windows, labels, timestamp):
    '''
    STEP e LEN in seconds
    :param pha_processed: the phasic np matrix with 4 columns
    :param WINSTEP: window step
    :param WINLEN: window length
    '''
    dtw_values = []
    for i in range(len(windows)):
        t_start, t_end = windows[i]
        win_s1= data_male[np.where((timestamp >= t_start) & (timestamp < t_end))[0]]
        win_s2= data_female[np.where((timestamp >= t_start) &  (timestamp < t_end))[0]]
        #normalizzazione delle porzioni
        #tn_male = np.array(data_male.tonic)
        #tn_female = np.array(data_female.tonic)
    
        #tn_male = mynorm_maxmin(tn_male)
        #tn_female = mynorm_maxmin(tn_female)
        win_s1= (win_s1- np.mean(win_s1)) / np.std(win_s1)
        win_s2= (win_s2- np.mean(win_s2)) / np.std(win_s2)
            
        dtw_curr= mlpy.dtw_std(win_s1, win_s2)
        dtw_curr = dtw_curr/ len(win_s1)
        lab = labels[i]
        dtw_values.append([dtw_curr, lab])
        #attaccare in qualche modo le labels
        
    return dtw_values #(gia' con le labels)
示例#11
0
def classifyImg():
    fileList = [x for x in listdir(r'F:\PY\data\Img') if x.lower().endswith(".jpg")]
    m = len(fileList)
    
    for fn in range(m):
        img = Image.open(r'F:\PY\data\Img\{0}'.format(fileList[fn]))
        arr = array(img)
        list = []
        if arr.ndim==2:
            print fileList[fn]
            continue;
        for n in arr: list.append(n[0][0])
        for n in arr: list.append(n[0][1])
        for n in arr: list.append(n[0][2])
        
        data[fileList[fn]] = list
    reference = data['007_0025.jpg']
    result = {}

    for x,y in data.items():
        dist = mlpy.dtw_std(reference,y,dist_only=True)
        result[x]=dist

    sortedRes = OrderedDict(sorted(result.items(),key=lambda x:x[1]))


    for a,b in sortedRes.items():
        print("{0} - {1}".format(a,b))
        i=i+1
        if i==10:
            break
 def dtw_interseries(s1, s2, squared=False):
     """
     :param s1: Time series 1 as list
     :param s2: Time series 2 as list
     :param squared: boolean. if true, distance is l2-norm. If false, l1-norm
     :return: unnormalized minimum-distance warp path between sequences
     """
     return mlpy.dtw_std(s1, s2, dist_only=True, squared=squared)
示例#13
0
 def dtw_interseries(s1, s2, squared=False):
     """
     :param s1: Time series 1 as list
     :param s2: Time series 2 as list
     :param squared: boolean. if true, distance is l2-norm. If false, l1-norm
     :return: unnormalized minimum-distance warp path between sequences
     """
     return mlpy.dtw_std(s1, s2, dist_only=True, squared=squared)
示例#14
0
def dtwDistance(list1, list2):

	y1 = [li['y'] for li in list1]
	y2 = [li['y'] for li in list2]
	
	dist= mlpy.dtw_std(y1, y2)
	
	return dist
示例#15
0
def dtw(filepath1,filepath2):


	v1 = get_json_from_file(filepath1)
	v2 = get_json_from_file(filepath2)

	dist, cost, path = mlpy.dtw_std(v1.flatten(), v2.flatten(), dist_only=False)	

	print dist, filepath2
示例#16
0
def test_dtw_short():
    """
    This test calculates DTW using DTW 1.0 by Rouanet (modified to remove normalization), dtw_std from mlpy and compares it with DTW in ts_analytics. It uses two ten element lists as using by Rouanet in http://nbviewer.ipython.org/github/pierre-rouanet/dtw/blob/master/simple%20example.ipynb.
    """
    x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
    y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]

    mlpy_dist = mlpy.dtw_std(x, y)
    tsa_dist = tsa.dtw(x, y)

    assert tsa_dist == mlpy_dist
示例#17
0
def dtwDistanceMLPY(times1, times2):
    mx = float(max(times1))
    mn = float(min(times1))
    if mx-mn > 0:
        times1 = [(item-mn)/(mx-mn) for item in times1]
    mx = float(max(times2))
    mn = float(min(times2))
    if mx-mn > 0:
        times2 = [(item-mn)/(mx-mn) for item in times2]
    dist = mlpy.dtw_std(times1, times2, dist_only=True)
    return dist
def recurs_in_single_linkage(dist, X, clst, sum_of_dist):
    minim = dist[0][0]
    # print(minim)
    for i in dist:
        if min(i) < minim:
            minim = min(i)
    # flag_one_from_couple = 0
    for index_col, row in enumerate(dist):
        if minim in row:
            buf = [index_col, row.index(minim)]
            # print(buf, dist[buf[0]][buf[1]])
    # print(X)
    for obj in X[buf[0] + buf[1] + 1]:
        X[buf[0]].append(obj)
    X.remove(X[buf[0] + buf[1] + 1])
    # print(X)
    new_dist = []
    for i in X[:-1]:
        buf_dist = []
        min_buf_dist = []
        for j in i:
            min_dist_col = []
            for k in X[X.index(i) + 1:]:
                min_dist_row = []
                for m in k:
                    min_dist_row.append(mlpy.dtw_std(j, m))
                min_dist_col.append(min(min_dist_row))
            buf_dist.append(min_dist_col)
        # print(buf_dist)
        # print(len(buf_dist))
        if len(buf_dist) > 1:  # проверка на матрицу
            for obj in range(len(buf_dist[0])):
                buf_min_for_buf_dist = []
                for min_obj in range(len(buf_dist)):
                    buf_min_for_buf_dist.append(buf_dist[min_obj][obj])
                min_buf_dist.append(min(buf_min_for_buf_dist))
            new_dist.append(min_buf_dist)
        else:
            new_dist.append(buf_dist[0])
    # print(new_dist)
    # print()
    # print()
    if len(X) > clst:
        # print("New recursion")
        sum_of_dist = recurs_in_single_linkage(new_dist, X, clst, sum_of_dist)
    else:
        center = [np.ndarray.tolist(np.mean(i, axis=0)) for i in X]
        # sum_of_dist = 0
        for i in range(len(X)):
            for j in range(len(X[i])):
                sum_of_dist += euclid_dist(center[i], X[i][j])
        return sum_of_dist
    return sum_of_dist
示例#19
0
def getDTWDist(data1, data2):
    '''
	R=rpy2.robjects.r
	DTW=importr('dtw')
	d1r,d1c=datat1.shape
	d2r,d2c=data2.shape
	data1R=R.matrix(data1,nrow=d1r,ncol=d1c)
	data2R=R.matrix(data2,nrow=d2r,ncol=d2c)
	alignment = R.dtw(data1R,data2R,keep=True, step_pattern=R.rabinerJuangStepPattern(4,"c"),open_begin=True,open_end=True,distance_only=True)
	return  alignment.rx('distance')[0][0]
	'''
    #	distance,path= fastdtw(data1,data2,dist=euclidean)
    return mlpy.dtw_std(data1, data2, dist_only=True)
示例#20
0
def FindNextStartAndEndPointOnPattern(m, eCounter, sCounter):
    sumExpressionAbsolute = -1
    l = 4
    while (sumExpressionAbsolute < e1):
        shortList = GetShortList(seriesRepresented, sList[sCounter], l)
        x = GetOnlyOneAxis(shortList, 0)
        y = GetOnlyOneAxis(shortList, 1)
        if len(x) <= 3 or len(y) <= 3:
            m = n
            return m, eCounter, sCounter
        regressionConstants = numpy.polyfit(x, y, 2)
        a = regressionConstants[0]
        b = regressionConstants[1]
        c = regressionConstants[2]

        sumExpression = 0
        for i in range(0, l):
            if i < len(shortList):
                functionSolved = SolveRegresionFunction(
                    a, b, c, shortList[i][0])
                expressionFormula = pow((functionSolved - shortList[i][0]), 2)
                sumExpression += expressionFormula
        sumExpressionAbsolute = GetAbsoluteValue(sumExpression)

        if (sumExpressionAbsolute < e1):
            l += 1
        else:
            eList.append(sList[sCounter] + l)
            eCounter += 1
            i = 1
            flag2 = "true"
            while flag2 == "true":
                firstList = GetShortList(seriesRepresented, sList[sCounter],
                                         eList[eCounter])
                secondList = GetShortList(seriesRepresented,
                                          (sList[sCounter]) + i,
                                          (eList[eCounter]) + i)
                firstAxis = GetOnlyOneAxis(firstList, 0)
                secondAxis = GetOnlyOneAxis(secondList, 0)
                dist, cost, path = mlpy.dtw_std(firstAxis,
                                                secondAxis,
                                                dist_only=False)
                if (dist <= e2):
                    i += 1
                if (i >= (eList[eCounter] - sList[sCounter])) or (dist > e2):
                    flag2 = "false"
            sList.append(eList[eCounter] + i)
            sCounter += 1
            m = sList[sCounter]
            return m, eCounter, sCounter
示例#21
0
def k_means_clust(data, num_clust, num_iter):
    # centroids = random.sample(list(data), num_clust)
    # b = np.random.randint(0, data.shape[0], num_clust)
    b = np.random.permutation(data.shape[0])[:num_clust]
    print('Initialisation ids: %s' % str(b))
    centroids = data[b]
    conv = []
    meta = []
    for n in range(num_iter):
        print(n)
        assignments = {}
        # assign data points to clusters #
        for ind, i in enumerate(data):
            min_dist = float('inf')
            closest_clust = None
            for c_ind, j in enumerate(centroids):
                cur_dist = mlpy.dtw_std(i, j, dist_only=True)
                if cur_dist < min_dist:
                    min_dist = cur_dist
                    closest_clust = c_ind
            if closest_clust in assignments:
                assignments[closest_clust].append(ind)
            else:
                assignments[closest_clust] = []
        # recalculate centroids of clusters #
        # key = cluster number #
        for key, init in zip(assignments, data[b]):
            clust_sum = 0
            for k in assignments[key]:
                # k = number of records (rows of matrix) #
                clust_sum = clust_sum + data[k]
            centroids[key] = [m / len(assignments[key]) for m in clust_sum]
            conv.append(
                float(mlpy.dtw_std(init, centroids[key], dist_only=True)))
        meta.append(conv)
        conv = []
    return [centroids, assignments, meta]
示例#22
0
def pairwise_dtw(samples, axis):
    """
    DTWによるサンプルのペアワイズ距離を取る

    samples : pandas.DataFrameのサンプル
    axis : columnの軸を指定
    """
    import mlpy
    from scipy.spatial.distance import squareform
    array1 = map(lambda data: data[axis], samples)
    d_array = []
    l_array = len(array1)
    for i in range(l_array-1):
        for j in range(i+1, l_array):
            d_array.append(mlpy.dtw_std(array1[i], array1[j], dist_only=True))
    X = squareform(d_array)
    return X
示例#23
0
def compareData(in_file,lib_file):
	vector_index = ['arr_0','arr_1','arr_2']
	dist = 0
	total_dist = 0
	dists = []


	in_data = np.load(in_file)
	lib_data = np.load(lib_file)
	for j in range(0,3):
		a = in_data[vector_index[j]] 
		b = lib_data[vector_index[j]] 
		dist = 	mlpy.dtw_std(a,b,dist_only=True)
		total_dist += dist
		dists.append(dist)

	return (total_dist,dists)
示例#24
0
def adaptive_align_dtw(trace, peaks, avg_height, ladders):

    data = trace

    ## TRY 1 -> use DTW

    from mlpy import dtw_std
    from matplotlib import pylab as plt
    from dtw import dtw

    peak_corr = {}
    for p in peaks:
        peak_corr[p.rtime] = []

    dtw_list = []
    for i in range(0, 3):
        for j in range(1, 3):

            standard_peaks, peak_index = generate_peaks(
                ladders, avg_height, peaks[i].rtime, peaks[-j].rtime)

            dist, cost, path = dtw_std(standard_peaks,
                                       data,
                                       dist_only=False,
                                       squared=True)
            plot_path(standard_peaks, data, path, [p[0] for p in peak_index])
            #dist, cost, path = dtw( standard_peaks, data )

            # fill peak correlation based on path

            for map_x, map_y in zip(path[0], path[1]):
                if map_y in peak_corr:
                    if standard_peaks[map_x] < avg_height / 2:
                        peak_corr[map_y].append(-1)
                    else:
                        peak_corr[map_y].append(
                            search_peak_index(map_x, peak_index))

    peak_assignment = score_peak_correlation(peak_corr)
    dpscore, rss, z, aligned_peaks = adaptive_peak_alignment(
        peak_assignment, peaks, ladders)

    return (dpscore, rss, z, aligned_peaks)
def single_linkage(x, clst):
    # dist = [[mlpy.dtw_std(cur_obj, other_obj) for other_obj in range(cur_obj + 1, len(X))] for cur_obj in range(len(X))]
    dist = []
    X = [[list(i)] for i in x]
    # print()
    # print(len(X))
    # print(len(X[0]))
    # print(len(X[0][0]))
    # print("HELLO")
    for i in X[:-1]:
        buf_dist = []
        for j in i:
            for k in X[X.index(i) + 1:]:
                for m in k:
                    buf_dist.append(mlpy.dtw_std(j, m))
        dist.append(buf_dist)
    print("Dist: ", dist)
    res = recurs_in_single_linkage(dist, X, clst, sum_of_dist=0)
    print(res)
    return res
示例#26
0
def dtw_k_means(data,num_clust,metric):
    t0 = time()
    centroids=random.sample(data,num_clust)
    counter=0
    for n in range(50):
        counter+=1
        #print counter
        assignments={}
        #assign data points to clusters
        for ind,i in enumerate(data):
            min_dist=float('inf')
            closest_clust=None
            for c_ind,j in enumerate(centroids):
                if LB_Keogh(i,j,5)<min_dist:
                    cur_dist=mlpy.dtw_std(i,j,dist_only=True)
                    if cur_dist<min_dist:
                        min_dist=cur_dist
                        closest_clust=c_ind
            if closest_clust in assignments:
                assignments[closest_clust].append(ind)
            else:
                assignments[closest_clust]=[]
 
        #recalculate centroids of clusters
        for key in assignments:
            clust_sum=0
            for k in assignments[key]:
                clust_sum=clust_sum+data[k]
            centroids[key]=[m/len(assignments[key]) for m in clust_sum]

    labels = [0] * len(data)
    for key in assignments:
    	for value in assignments[key]:
    		labels[value] = key

    t1 = time()

    labels = np.array(labels)
    return ('Kmeans DTW', len(assignments.keys()), accuracy.getAccuracy(data,labels,len(data),'euclidean'),t1-t0)
示例#27
0
def dtw(x,y):
    x=np.genfromtxt(x, delimiter=',')                             #DTW correlate between two signals to calculate the distance
    y=np.genfromtxt(y, delimiter=',')
    x = downsampling(x)
    y = downsampling(y)
    x= x[:,1]
    y= y[:,1]
    x= (x- np.mean(x)) / np.std(x)
    y= (y- np.mean(y)) / np.std(y)
    plt.plot(x)
    plt.plot(y)
    plt.show()
    dist, cost, path = mlpy.dtw_std(x, y, dist_only=False)
    print np.array(dist)
    fig = plt.figure(1)
    ax = fig.add_subplot(111)
    plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')
    plot2 = plt.plot(path[0], path[1], 'w')
    xlim = ax.set_xlim((-0.5, cost.shape[0]-0.5))
    ylim = ax.set_ylim((-0.5, cost.shape[1]-0.5))
    plt.show()
    return dist
示例#28
0
def adaptive_align_dtw( trace, peaks, avg_height, ladders ):

    data = trace

    ## TRY 1 -> use DTW

    from mlpy import dtw_std
    from matplotlib import pylab as plt
    from dtw import dtw

    peak_corr = {}
    for p in peaks:
        peak_corr[p.rtime] = []

    dtw_list = []
    for i in range(0, 3):
        for j in range(1, 3):

            standard_peaks, peak_index = generate_peaks( ladders, avg_height,
                                                peaks[i].rtime, peaks[-j].rtime )

            dist, cost, path = dtw_std( standard_peaks, data, dist_only=False, squared=True)
            plot_path( standard_peaks, data, path, [ p[0] for p in peak_index ] )
            #dist, cost, path = dtw( standard_peaks, data )

            # fill peak correlation based on path

            for map_x, map_y in zip( path[0], path[1] ):
                if map_y in peak_corr:
                    if standard_peaks[map_x] < avg_height/2:
                        peak_corr[map_y].append( -1 )
                    else:
                        peak_corr[map_y].append( search_peak_index( map_x, peak_index ) )

    peak_assignment = score_peak_correlation( peak_corr )
    dpscore, rss, z, aligned_peaks = adaptive_peak_alignment( peak_assignment, peaks, ladders )

    return (dpscore, rss, z, aligned_peaks)
示例#29
0
    def _postprocessing(self, result, satellite, querylen, time):
        """
        Conduct post-processing to the retrieved result.
        Return <song_id, hits for majority time offset, value of time offset, overall hits #>
        satellite: the naive hits from the hashing table. type(satellite) is ndarray.
        querylen: the length of time of query, the unit is 10ms.
        """
        # Permit +/- 10% tempo difference
#        tolerance = 0.1
        output = np.zeros((len(result), 4), dtype=int)
#        delta = querylen * tolerance
        # Find the most popular time offset
        for i in range(len(result)):
            # Fetch satellites contain specific track id
            tkR = satellite[satellite[:,0]==result[i,0]]
            # Drawing the histogram of the value of offset time
            dts, xx = self._unique_first(tkR[:,2])
            xx.append(len(tkR))
            dtcounts = np.diff(xx)
            xx = dtcounts.argmax(0)
            vv = dtcounts.max(0)

            hitted = np.array([tkR[k] for k in range(len(tkR)) if tkR[k,2]==dts[xx]])
            stamp, ind = self._unique_first(hitted[:,1])
            ind.append(len(hitted))
            dtcounts = np.diff(ind)

            hitlen = np.zeros(len(time), dtype=int)
            for j in stamp:
                hitlen[time.index(j-dts[xx])] = dtcounts[stamp.index(j)]
            from mlpy import dtw_std
            dis = dtw_std(querylen, hitlen)
            output[i]=[result[i,0],len(stamp),len(hitted),dis]

        # Sort the R in accordance with time coverage
        output.view('i8,i8,i8,i8').sort(order=['f1'], axis=0)
        return output[::-1]
示例#30
0
文件: speedtest.py 项目: xuq/cdtw
import numpy as np
import time

from cdtw.src.pydtw import*

r = np.arange(6000)
q = np.arange(6000)

s = Settings()
s.step.set_type('dp2')

# s.global_constraint.set_type('itakura')
# s.global_constraint.set_param(0.2)
# 

s.compute_path = False


t1 = time.time()
d = dtw(r, q, s)
t2 = time.time()

print s
print "python total time: " + str(t2 - t1)


import mlpy
t1 = time.time()
d = mlpy.dtw_std(r, q, dist_only = False)
t2 = time.time()
print "mlpy total time: " + str(t2 - t1)
示例#31
0
文件: dtw.py 项目: nejcrihter/PRvaje
import mlpy
import matplotlib.pyplot as plt
import matplotlib.cm as cm

x = [0, 0, 0, 0, 1, 1, 2, 2, 3, 2, 1, 1, 0, 0, 0, 0]
y = [0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 0, 0]

dist, cost, path = mlpy.dtw_std(x, y, dist_only=False)

fig0 = plt.figure(1)
plt.plot(x, "b")
plt.plot(y, "r")

fig = plt.figure(2)
ax = fig.add_subplot(111)
plot1 = plt.imshow(cost.T,
                   origin='lower',
                   cmap=cm.gray,
                   interpolation='nearest')
plot2 = plt.plot(path[0], path[1], 'w')
xlim = ax.set_xlim((-0.5, cost.shape[0] - 0.5))
ylim = ax.set_ylim((-0.5, cost.shape[1] - 0.5))
plt.show()
示例#32
0
def dtw_distance(x, y):
    return mlpy.dtw_std(x, y, dist_only=True)
示例#33
0
def kmeans(X, clusters, num_iter=300, labels=0, metr="euclid"):
    rnd_centr = random.sample(range(len(X)), clusters)
    print("random center:", rnd_centr)
    # print("random number centroid:", rnd_centr)
    # num_centr = [X[0], X[2]]
    num_centr = [X[i] for i in rnd_centr]
    print("len num of centr in start:", len(num_centr))
    global sum_of_dist
    for n in range(num_iter):
        dist = []
        for obj in X:  # для каждого объекта исходной матрицы ищем расстояние до центройдов
            buf = [mlpy.dtw_std(obj, centr)
                   for centr in num_centr]  # расстояние до каждого центройда
            # print("len of buf:", len(buf))
            dist.append(buf)  # формируем матрицу расстояний
        print("len DIST:", len(dist))
        num_clst = [
            [d.index(min(d)), min(d)] for d in dist
        ]  # минимальное расстояние до какого либо центройда для каждого объекта
        print()
        print("len num_clst", len(num_clst))
        dict_clst = {
        }  # словарь для вормирования принодлежности каждого вектора к конкретному классу
        sum_of_dist = 0  # переменная для накопления суммы расстояний до центройдов
        arr_clst = []
        for i in range(len(num_clst)):
            if num_clst[i][
                    0] not in dict_clst:  # проверка на существоание номера кластера в словаре
                dict_clst[num_clst[i][0]] = []
                arr_clst.append(num_clst[i][0])
            dict_clst[num_clst[i][0]].append(
                X[i])  # добавление конкретного вектора к конкретному кластеру
            sum_of_dist += num_clst[i][1]
        arr_clst.sort()
        print("ln dictionary:", len(dict_clst))
        # print("Sum of dist:", sum_of_dist, " for centroids:", num_centr)
        last_centr = copy.deepcopy(num_centr)  # запоминание предыдущих центров
        num_centr = []
        for key, value in dict_clst.items():
            print("dict items:", key, value)
            num_centr.append(
                np.ndarray.tolist(np.array(value).mean(axis=0))
            )  # находим среднее по координатам для пересчета центроидов
        # print("New centroid:", num_centr)
        # print("last_centr:", last_centr, len(last_centr), len(last_centr[0]))
        # print("new center:", num_centr, len(num_centr), len(num_centr[0]))
        if len(num_centr) != len(last_centr):
            continue
        try:
            if (np.array(last_centr) == np.array(num_centr)).all(
            ):  # выходим из алгоритма если новые центры совпадают со старыми
                if labels:
                    return dict_clst
                else:
                    return sum_of_dist
        except AttributeError:
            print(len(last_centr))
            print(len(num_centr))
        # print(num_centr, sum_of_dist)
        # print()
    return sum_of_dist
示例#34
0
    def process3(self):
        #s1 = web.get_data_yahoo('AAL', '2014-12-15','2015-05-8')['Adj Close']
        s1 = web.get_data_yahoo('INTC', '2014-10-15',
                                '2015-02-03')['Adj Close']
        s2 = web.get_data_yahoo('ibm', '2015-02-23', '2015-04-06')['Adj Close']
        n1 = np.array(s1.tolist())
        n2 = np.array(s2.tolist())

        print len(n1), len(n2)
        if (len(n2) < len(n1)):
            print "interpolate"
            steps = (len(n2) * 1.0 - 1.0) / (len(n1) - len(n2))
            x1 = np.arange(1, len(n2) + 1)
            f = interp1d(x1, n2)
            print n2
            x_fake = np.arange(1.1, len(n2), steps)
            print len(x_fake)
            print x_fake
            c = np.sort(np.concatenate((x1, x_fake)))
            print c
            y1 = np.array([f(i) for i in c])
            print y1

        #s1=s1.reindex(index=np.arange(len(s1)))
        #print s1
        '''
        if (len(s2)<len(s1)):
            x2= pd.date_range(s1.index[0],s1.index[-1],freq='D')
            s2=s2.reindex(x2) 
        print s2
        '''
        a = pd.Series(n1)
        b = pd.Series(y1)
        rets1 = a.pct_change()
        rets2 = b.pct_change()

        rets1[0] = 0
        rets2[0] = 0
        '''
        print rets1
        print rets2 
        '''
        '''
        print type(rets1)
        corr = pd.rolling_corr(rets1, rets2, 10)
        print type(corr),corr
        cor,pval = pearsonr(rets1,rets2)
        print "pearsonr",str(cor),pval
        '''
        cor, pval = pearsonr(rets1, rets2)
        print "pearsonr", str(cor), pval
        print "def2", pearson_def(rets1, rets2)

        ##
        '''
        rets3 = rets1.shift(5)
        rets3.fillna(0,inplace=True)#method='ffill')
        print rets3
        '''
        dist, cost, path = mlpy.dtw_std(rets1, rets2, dist_only=False)
        print "dist", dist
        pass
示例#35
0
文件: lghdtw.py 项目: Rventric/Bilder
     #dont compare if the dont have a 75 differece
     if len(AllHandFeats[i])>len(S) and len(S)/float(len(AllHandFeats[i]))<0.65: 
         # print 'skip'
         dist=float("inf")
     elif len(S)>len(AllHandFeats[i]) and len(AllHandFeats[i])/float(len(S))<0.65:
         # print 'skip'
         dist=float("inf")
     else:
         
         #my DTW 
         #EV=EventHorizon()
         #dis=EV.SimpleDynTimeWarp(S, AllHandFeats[i], band_width)
         
         
         if 'SC' in ZoneType:
             dist,cost,path=mlpy.dtw_std(S,AllHandFeats[i], dist_only=False, metric='euclidean', constraint='slanted_band', k=band_width)
         else:
             dist,cost,path=mlpy.dtw_std(S,AllHandFeats[i], dist_only=False, metric='euclidean', constraint='itakura', k=band_width)
             
         dist/=float(len(S)+len(AllHandFeats[i]))
             
         print 'DTW :',os.path.basename(Fake[0]),transcript,'=',dist
 
         if math.isinf(dist)==False:
             
             RealComps+=1  
             #add result to mysql
             try:
                 qr=MeinSql.cursor()
 
                 erotima="Insert INTO Local (SynthFile,HandFile,HandWord,Score,Zoni,Platos) VALUES('"+ os.path.basename(Fake[0])+"','"+os.path.basename(HandFileNames[i][0]) +"','"+ transcript+"','" + str(dist)+"','"+ ZoneType+"','"+ str(band_width) +"')"
                thumpVotes = 0
                nailVotes = 0
                velcroVotes = 0
                totalVotes = 0
                fftrmsarray = []

            peaktopeak = np.max(audioWindow) - np.min(audioWindow)
            rms = np.sqrt(np.mean(np.square(audioWindow)))
            if rms > 700:
                prevTime = t
                normalized = audioWindow / (peaktopeak / 2.0)

                totalVotes += 1

                for template in templates["t"]:
                    thumpDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if thumpDist < 90: thumpVotes += 1
                    thumpVotes += 90 / thumpDist
                    print "ThumpDist = " + str(thumpDist)

                for template in templates["n"]:
                    nailDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if nailDist < 200: nailVotes += 1
                    nailVotes += 200 / nailDist
                    print "NailDist = " + str(nailDist)

                for template in templates["v"]:
                    velcroDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if velcroDist < 120: velcroVotes += 1
                    velcroVotes += 120 / velcroDist
                    print "VelcroDist = " + str(velcroDist)
示例#37
0
f.close()

indx = 0
f = open( 'D:/FYP-Developments/Dataset-Debs-2013/MovingAverageData/resultDTW5.csv', 'rU' ) #open train data
for line in f:

    cells = line.split(",")
    e.append((float)(cells[7]))
    indx = indx + 1

    if indx == dtw_data_limit:
        break

f.close()

dist, cost, path = mlpy.dtw_std(a, d, dist_only=False)

print("Distance between 7th and 28th minutes - Two Golas")
print(dist)
print("############")

dist, cost, path = mlpy.dtw_std(b, c, dist_only=False)

print("Distance between 13th and 22nd minutes - Two Goals")
print(dist)
print("############")

dist, cost, path = mlpy.dtw_std(a, e, dist_only=False)

print("Distance between 7th and 5th minutes")
print(dist)
    path.append([0,0])
    for [fadi2, fadi1] in path:
        cost = cost +distances[fadi1, fadi2]
    return path, cost 

path, cost = path_cost(fadi1, fadi2, accumulated_cost, distances)
print(path)
print(cost)

#this is an implementation that we have created for this problem but we can also try it using a library that pythn has ad see the difference

#attempt using the mlpy library

import mlpy

dist, cost, path = mlpy.dtw_std(fadi1, fadi2, dist_only = False)

import matplotlib.cm as cm
fig = plt.figure(1)
ax = fig.add_subplor(11)
plot1 = plt.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')
plot2= plt.plot(path[0], path[1], 'w')
xlim = ax.set_xlim((-.05, cost.shape[0]-.05))
ylim = ax.set_ylim((-0.5, cost.shape[1]-0.5))

dist

plt.plot(fadi1, 'bo-' ,label='Fadi 1')
plt.plot(fadi2, 'g^-', label = 'Fadi 4')
plt.legend()
paths = path_cost(fadi1, fadi2, accumulated_cost, distances)[0]
示例#39
0
def cluster(data, sum_t, num):
    ##############        Clustering Initial Timeseries         ################
    plots = []
    D = len(data)
    # Calculate distance matrix using DTW
    dist_mat = np.empty(shape=(D, D))
    ii = 0
    labels1 = []
    for i in data.keys():
        jj = 0
        for j in data.keys():
            if ii == jj:
                dist_mat[ii][jj] = 0
            else:
                dist_mat[ii][jj] = dtw_std([z for z in data[i]],
                                           [z for z in data[j]],
                                           dist_only=True)
            jj += 1
        ii += 1
        labels1.append(i)

    # Use average linkage + DTW to remove outliers

    avg = linkage(squareform(dist_mat), method='average', metric='euclidean')
    clusters = fcluster(avg, floor(log(num) / log(2)), 'maxclust')
    #print(clusters)
    freq = {}
    for i in clusters:
        if i not in freq:
            freq[i] = 1
        else:
            freq[i] += 1

    # Stocks which appear in almost empty clusters are considered outliers
    thresh = max(freq.values())
    mfe = [k for k, v in freq.items() if v > floor(num / 10)]

    f = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')

    plt.subplot(211)
    plt.title("With Outliers")
    if num > 50:
        dendrogram(avg, leaf_font_size=10)
    else:
        dendrogram(avg, labels=labels1, leaf_font_size=11)
    locs, ll = plt.xticks()
    plt.setp(ll, rotation=90)

    # Remove outliers and their labels
    outliers = []
    for i in range((len(clusters) - 1), -1, -1):
        if clusters[i] not in mfe:
            print("Outlier stock: " + str(labels1[i]))
            outliers.append(labels1[i])
            dist_mat = np.delete(dist_mat, i, 0)
            dist_mat = np.delete(dist_mat, i, 1)
            del labels1[i]

    print("Total Outliers: " + str(len(outliers)))

    # Finally do the clustering !!!
    ward = linkage(dist_mat, method='ward', metric='euclidean')
    clusters1 = fcluster(ward, 3, 'maxclust')
    #print(clusters1)
    #print(labels1)
    plt.subplot(212)
    plt.title("After removing outliers")
    if num > 50:
        dendrogram(ward, no_labels=True, leaf_font_size=10)
    else:
        dendrogram(ward, labels=labels1, leaf_font_size=11)
    plt.subplots_adjust(hspace=.5)
    locs, ll = plt.xticks()
    plt.setp(ll, rotation=90)
    plots.append(f)

    ###########        Plot Initials & Summaries           ############

    f2 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')
    plt.subplot(211)
    colormap = plt.cm.gist_ncar
    f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)])
    for k in data.keys():
        data[k].plot()

    plt.subplot(212)
    f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)])
    for k in sum_t.keys():
        (sum_t[k] / 10).plot()
    plt.xlim(0, len(sum_t[max(sum_t, key=len)]) - 1)
    plt.subplots_adjust(hspace=.32)
    plots.append(f2)

    #############       Clustering Summarized Timeseries        ############

    S = len(sum_t)
    dist_mat = np.empty(shape=(S, S))
    ii = 0
    labels2 = []
    # Calculate distance matrix using DTW
    for i in sum_t.keys():
        jj = 0
        for j in sum_t.keys():
            if ii == jj:
                dist_mat[ii][jj] = 0
            else:
                dist_mat[ii][jj] = dtw_std([z for z in sum_t[i]],
                                           [z for z in sum_t[j]],
                                           dist_only=True)
            jj += 1
        ii += 1
        labels2.append(i)

    # Use average linkage + DTW to remove outliers

    avg = linkage(squareform(dist_mat), method='average', metric='euclidean')
    clusters = fcluster(avg, floor(log(num, 2)), 'maxclust')
    #print(clusters)
    freq = {}
    for i in clusters:
        if i not in freq:
            freq[i] = 1
        else:
            freq[i] += 1

    # Stocks which appear in almost empty clusters are considered outliers
    thresh = max(freq.values())
    mfe = [k for k, v in freq.items() if v > floor(num / 10)]
    f3 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')
    plt.subplot(211)
    plt.title("With Outliers")
    if num > 50:
        dendrogram(avg, no_labels=True, leaf_font_size=10)
    else:
        dendrogram(avg, labels=labels2, leaf_font_size=11)
    locs, ll = plt.xticks()
    plt.setp(ll, rotation=90)

    outliers = []
    # Remove outliers
    for i in range((len(clusters) - 1), -1, -1):
        if clusters[i] not in mfe:
            print("Outlier stock: " + str(labels2[i]))
            outliers.append(labels2[i])
            dist_mat = np.delete(dist_mat, i, 0)
            dist_mat = np.delete(dist_mat, i, 1)
            del labels2[i]

    print("Total Outliers: " + str(len(outliers)))

    # Finally do the clustering !!!
    ward = linkage(dist_mat, method='ward', metric='euclidean')
    clusters2 = fcluster(ward, 3, 'maxclust')
    #print(clusters2)
    #print(labels2)
    plt.subplot(212)
    plt.title("After removing outliers")
    if num > 50:
        dendrogram(ward, leaf_font_size=10)
    else:
        dendrogram(ward, labels=labels2, leaf_font_size=11)
    plt.subplots_adjust(hspace=.5)
    locs, ll = plt.xticks()
    plt.setp(ll, rotation=90)

    plots.append(f3)
    colormap = plt.cm.gist_ncar
    f4 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters with Initial Timeseries")
    for i in range(1, 4):
        plt.subplot(int(310 + i))

        f4.gca().set_color_cycle(
            [colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters1)):
            if clusters1[j] == i:
                data[labels1[j]].plot(label=labels1[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)

        plt.legend(loc=4, prop={'size': 10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f4)

    f5 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters based on Summarization Timeseries")
    for i in range(1, 4):
        plt.subplot(int(310 + i))
        f4.gca().set_color_cycle(
            [colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters2)):
            if clusters2[j] == i:
                data[labels2[j]].plot(label=labels2[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)

        plt.legend(loc=4, prop={'size': 10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f5)

    f6 = plt.figure(figsize=(6, 6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters with Summarization Timeseries")
    for i in range(1, 4):
        plt.subplot(int(310 + i))
        colormap = plt.cm.gist_ncar
        f4.gca().set_color_cycle(
            [colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters2)):
            if clusters2[j] == i:
                (sum_t[labels2[j]] / 10).plot(label=labels2[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)

        plt.legend(loc=4, prop={'size': 10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f6)

    return (plots)
示例#40
0
                mGamma[i][0] = inf
        for i in range(alen+1):
                mGamma[0][i] = inf
        mGamma[0][0] = 0
        for i in range(blen):
                for j in range(alen):
                        cost = 1-numpy.corrcoef(a[j], b[i])[0,1]
                        mGamma[i+1][j+1] = cost + min(mGamma[i][j], mGamma[i+1][j], mGamma[i][j+1])
        return mGamma[blen][alen]


dist = [[] for index in range(len(chromaset))]
for i in range(len(chromaset)):
        tempdist = []
        for j in range(12):
                tem, cost, p = mlpy.dtw_std(chromaset[i], hum_chroma, dist_only=False)
                tempdist.append(tem)
                hum_chroma = mod(hum_chroma+ones(len(hum_chroma)), 12)
        dist[i] = min(tempdist)

print song_list
print dist

match = song_list[dist.index(min(dist))]
for i in range(3):
        out1=min(dist)
        print song_list[dist.index(out1)]
        song_list.remove(song_list[dist.index(out1)])
        dist.remove(out1)

execfile('all.py')
    tn_female = np.array(data_female.tonic)
    
    tn_male = mynorm_maxmin(tn_male)
    tn_female = mynorm_maxmin(tn_female)
    
    indexes = np.arange(len(tn_male))
    keep = (indexes % N_SAMP == 0)
    
    tn_male = np.array(tn_male[keep])
    tn_female = np.array(tn_female[keep])
    #	SAMP_F = 1.0 / (data_male.iloc[1,0] - data_male.iloc[0,0])
    #	timestamp = np.arange(0, l/SAMP_F, 1.0/SAMP_F)
        
    #    labs = np.array(data_female.iloc[:,-1])    
        
    #    windows, labels = wnd.get_windows_no_mix(timestamp, labs, WINLEN, WINSTEP)
        
    	#extrazione porzione da entrambi i file
    	
    #    n_col = data_male.shape[1]
    #    for i in range(1, n_col - 1):
    dtw_curr= mlpy.dtw_std(tn_male, tn_female)
    dtw_curr = dtw_curr/ len(tn_male)
    #    dtw_curr = modified_extract_features(tn_male, tn_female, windows, labels, timestamp)
    dtw_measures.append(dtw_curr)
res_exp = np.vstack([experiments, dtw_measures])
np.savetxt('F_dtw_all_signal.csv', res_exp, delimiter=',')

#print dtw_measures
#salvare dtw_measures su file di testo
示例#42
0
def dtwDistance(x, y):
    dis, cost, path = mlpy.dtw_std(x, y, dist_only=False)
    return dis, path
示例#43
0
            data[i] = (data[i] - minvals[index])/ranges[index]

        # plt.figure(1)
        # plt.plot(data[0], label='Prox1')
        # plt.plot(data[1], label='Prox2')
        # plt.plot(data[2], label='Prox4')
        # plt.legend()

        #transpose for PCA, then transpose back
        pcaData = PCA(data.T).Y.T

        # plt.figure(2)
        # plt.plot(pcaData[0], label='PCA0')
        # plt.plot(pcaData[1], label='PCA1')
        # plt.plot(pcaData[2], label='PCA2')
        # plt.legend()

        dist = 0
        if prevPCA is not None:
            dist = mlpy.dtw_std(pcaData[0], prevPCA, dist_only=True)
            print dist

        prevPCA = pcaData[0]

        result = 'Dist: ' + str(dist)
        start = False
        finish = False
        resultReady = True

        #plt.show()
示例#44
0
def sDTW(query, subject):

    M, N = len(query), len(subject)

    for i in range(N - M + 1):
        mlpy.dtw_std(query, subject[i:i + M], dist_only=True)
示例#45
0
f.close()

indx = 0
f = open( '', 'rU' ) #Provide dataset 2 file path for comparison
for line in f:

    cells = line.split(",")
    b.append((float)(cells["X"])) #X = Provide the respective column of the dataset which needs to be analysed
    indx = indx + 1

    if indx == dtw_data_limit:
        break

f.close()

dist, cost, path = mlpy.dtw_std(a, b, dist_only=False)

print("Distance between a and b temporal sequneces")
print(dist)
print("############")

#End - Distance Calculation

#Start - Plot

plt.figure("Two temporal sequnences")
plt.plot(a)
plt.plot(b)

fig = plt.figure("Accumulated Cost Matrix & warping path")
ax = fig.add_subplot("ACM & WP")
for i in range(s):
    d, p = fastdtw(eps_sueno[i].stt, eps_sueno[i].stt, dist=euclidean)
    dd, p = fastdtw(eps_sueno[i].stf, eps_sueno[i].stf, dist=euclidean)
    dt = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[i].stt, dist_only=True)
    df = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[i].stf, dist_only=True)
    print d, dd, dt, df
"""

#Calcular matriz de distancias entre cada individuo por DTW
s = len(eps_sueno)
distancias = np.zeros((s, s))
for i in range(s):
    for j in range(s):
        #distanceTemp , path = fastdtw(eps_sueno[i].stt, eps_sueno[j].stt, dist=euclidean) #Distancia en temperatura
        #distanceFlujo , path = fastdtw(eps_sueno[i].stf, eps_sueno[j].stf, dist=euclidean) #Distancia en flujo
        distanceTemp = mlpy.dtw_std(eps_sueno[i].stt, eps_sueno[j].stt, dist_only=True) #Dist. euclidea
        distanceFlujo = mlpy.dtw_std(eps_sueno[i].stf, eps_sueno[j].stf, dist_only=True)
        distancias[j][i] = math.sqrt(math.pow(distanceTemp, 2) + math.pow(distanceFlujo, 2)) #Distancia euclídea total
    print '.'

#Vector con las distancias requeridas para hacer clustering
print distancias
print distancias.shape

"""
Resultados:
centroid: 0.82848866781
single: 0.340428699013
complete: 0.80537453305
average: 0.827708738138
weighted: 0.816403408353
示例#47
0
    querynames[index] = map(float, querynames[index])
    querynames[index] = np.asarray(querynames[index])

#window
count = 0
for window in range(0, (len(querynames[-1]) / 250)):
    y = querynames[-1][count:count + 500]
    print(querynames[-1][count:count + 500])
    #test each amplicon against F reference
    for amp in range(0, 11):
        #amplicons 1F
        x = querynames[amp]
        #mlpy
        timeb = datetime.now()
        mlpystddist, mlpystdcost, mlpystdpath = mlpy.dtw_std(x,
                                                             y,
                                                             dist_only=False)
        timet = datetime.now() - timeb
        print("mlpy complete on amp " + str(amp + 1))
        with open("bench_log.txt", "a") as text_file:
            text_file.write("\n" + str(amp + 1) + "," + str(window + 1) +
                            ",mlpy," + str(mlpystddist) + "," +
                            str(timet.microseconds) + ',' +
                            str(mlpystdpath[1][0] + count) + ',' +
                            str(mlpystdpath[1][-1] + count))
        path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                           str(window + 1) + '_query_mlpy.txt',
                           mlpystdpath[0],
                           delimiter=',')
        path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                           str(window + 1) + '_ref_mlpy.txt',
示例#48
0
def test_dtw_example():
    """
    Test using code
    """
    #Wind to get DTW solution
    window = pd.to_timedelta(1. * 3600., unit='s')

    #Setup format for datetime string to pass to my_dtw later
    dfmt = '{0:%Y/%m/%d %H:%M:%S}'

    #twind4 = pd.to_datetime('2016/12/09 04:45:29')
    #start_t4 = dfmt.format(twind4-2.5*window)
    #end_t4 = dfmt.format(twind4+3.5*window)
    twind4 = pd.to_datetime('2016/12/21 08:43:12')
    start_t4 = dfmt.format(twind4 - 2.5 * window)
    end_t4 = dfmt.format(twind4 + 3.5 * window)
    #my_dtw4 = mtr.dtw_plane(start_t4,end_t4,nproc=4,penalty=True,events=7,earth_craft=['THEMIS_B'],par=['Bt'],speed_pen=500,mag_pen=100.2)
    my_dtw4 = mtr.dtw_plane(start_t4,
                            end_t4,
                            nproc=4,
                            penalty=True,
                            events=7,
                            par=['Bt'],
                            earth_craft=['THEMIS_B'],
                            speed_pen=500,
                            mag_pen=100.2)
    my_dtw4.init_read()
    my_dtw4.iterate_dtw()

    #my_dtw4.pred_earth()
    #mtr.omni_plot(my_dtw4)

    sc1 = 'Wind'
    sc2 = 'SOHO'

    x1 = np.array(my_dtw4.plsm[sc1].SPEED.ffill().bfill().values,
                  dtype=np.double)
    x2 = np.array(my_dtw4.plsm[sc2].SPEED.ffill().bfill().values,
                  dtype=np.double)

    #Example DTW plot
    p1, p2, cost1 = md.dtw_path_single(x1, x2, 300, 30, 500.0, 0.0, 0.5, 1)
    #p1,p2,cost = md.dtw_path_single(x2,x2],2700,2700/2,0.0,0.01,1)
    #mlpy example path
    dist, costa, path = mlpy.dtw_std(x1, x2, dist_only=False)
    pa, pb = path[0], path[1]

    #create multi panel diagnostic plot 2018/11/26 J. Prchlik
    fig, ax = plt.subplots(nrows=2,
                           ncols=2,
                           gridspec_kw={
                               'height_ratios': [2, 1],
                               'width_ratios': [1, 2]
                           },
                           figsize=(8, 8))
    fig.subplots_adjust(hspace=0.05, wspace=0.05)
    #turn off bottom left axis
    ax[1, 0].axis('off')
    lims = mdates.date2num([
        my_dtw4.plsm[sc1].index.min(), my_dtw4.plsm[sc1].index.max(),
        my_dtw4.plsm[sc2].index.min(), my_dtw4.plsm[sc2].index.max()
    ])
    v_max, v_min = np.percentile(costa, [95, 15])
    ax[0, 1].imshow(costa,
                    extent=lims,
                    origin='lower',
                    cmap=plt.cm.gray.reversed(),
                    vmin=v_min,
                    vmax=v_max,
                    aspect='auto')
    #ax.imshow(cost,extent=[0,x2.size,0,x2[::2].size],origin='lower')
    ax[0, 1].plot(my_dtw4.plsm[sc1].iloc[p1, :].index,
                  my_dtw4.plsm[sc2].iloc[p2, :].index,
                  '--',
                  color='black')
    ax[0, 1].plot(my_dtw4.plsm[sc1].iloc[pa, :].index,
                  my_dtw4.plsm[sc2].iloc[pb, :].index,
                  '-',
                  color='red')
    ax[0, 1].xaxis_date()
    ax[0, 1].yaxis_date()
    date_format = mdates.DateFormatter('%H:%M')

    #plot the plasma values on the off axes
    ax[1, 1].plot(my_dtw4.plsm[sc1].index, x1, color='blue')
    ax[0, 0].plot(x2, my_dtw4.plsm[sc2].index, color='teal')

    #set up axis formats
    ax[1, 1].xaxis_date()
    ax[0, 0].yaxis_date()

    #force limits to be the same as the cost matrxi
    ax[1, 1].set_xlim(lims[:2])
    ax[0, 0].set_ylim(lims[2:])

    #Format the printed dates
    ax[1, 1].xaxis.set_major_formatter(date_format)
    ax[0, 0].yaxis.set_major_formatter(date_format)

    #Add label time
    ax[1, 1].set_xlabel(sc1 + ' Time [UTC]')
    ax[0, 0].set_ylabel(sc2 + ' Time [UTC]')

    #Add label for Speeds
    ax[1, 1].set_ylabel('Flow Speed [km/s]')
    ax[0, 0].set_xlabel('Flow Speed [km/s]')

    #turn off y-tick labels in center plot
    ax[0, 1].set_xticklabels([])
    ax[0, 1].set_yticklabels([])

    #set Wind and SOHO to have the same plasma paramter limits
    pls_lim = [420., 675.]
    ax[0, 0].set_xlim(pls_lim)
    ax[1, 1].set_ylim(pls_lim)

    #copy y-axis labels from Wind plot to SOHO plot
    ax[0, 0].set_xticks(ax[1, 1].get_yticks())
    ax[0, 0].set_xlim(pls_lim)
    ax[1, 1].set_ylim(pls_lim)
    ##ax[0,0].set_xlabel('Flow Speed [km/s]')

    #clean up the axes with plasma data
    fancy_plot(ax[0, 0])
    fancy_plot(ax[1, 1])

    # This simply sets the x-axis data to diagonal so it fits better.
    #fig.autofmt_xdate()
    fig.savefig('../plots/example_dtw_path.png',
                bbox_pad=.1,
                bbox_inches='tight')
    fig.savefig('../plots/example_dtw_path.eps',
                bbox_pad=.1,
                bbox_inches='tight')
    return x1, x2, my_dtw4
                thumpVotes = 0
                nailVotes = 0
                velcroVotes = 0
                totalVotes = 0
                fftrmsarray = []

            peaktopeak = np.max(audioWindow) - np.min(audioWindow)
            rms = np.sqrt(np.mean(np.square(audioWindow)))
            if rms > 700:
                prevTime = t
                normalized = audioWindow/(peaktopeak/2.0)

                totalVotes += 1

                for template in templates['t']:
                    thumpDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if thumpDist < 90: thumpVotes += 1
                    thumpVotes += 90/thumpDist
                    print 'ThumpDist = ' + str(thumpDist)

                for template in templates['n']:
                    nailDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if nailDist < 200: nailVotes += 1
                    nailVotes += 200/nailDist
                    print 'NailDist = ' + str(nailDist)

                for template in templates['v']:
                    velcroDist = mlpy.dtw_std(normalized, template, dist_only=True)
                    # if velcroDist < 120: velcroVotes += 1
                    velcroVotes += 120/velcroDist
                    print 'VelcroDist = ' + str(velcroDist)
示例#50
0
from PIL import Image
from numpy import array
import os
import pprint
import mlpy
from collections import OrderedDict

data = {}
l = len(os.listdir("image"))
for fn in range(0, l - 1):
    img = Image.open("image\\{0}.jpg".format(fn))
    arr = array(img)
    list = []
    for n in arr:
        list.append(n[0][0])  #R
    for n in arr:
        list.append(n[0][1])  #G
    for n in arr:
        list.append(n[0][2])  #B
    data[fn] = list
reference = data[31]
result = {}
for x, y in data.items():
    #print("{0} ----------------- {1}".format(x,y))
    dist = mlpy.dtw_std(reference, y, dist_only=True)
    result[x] = dist
sortedRes = OrderedDict(sorted(result.items(), key=lambda x: x[1]))
for a, b in sortedRes.items():
    print("{0} - {1}".format(a, b))
示例#51
0
def cluster(data, sum_t, num):
    ##############        Clustering Initial Timeseries         ################
    plots = []
    D = len(data)
    # Calculate distance matrix using DTW
    dist_mat = np.empty(shape=(D,D))
    ii = 0
    labels1=[]
    for i in data.keys():
        jj = 0
        for j in data.keys():
            if ii==jj:
                dist_mat[ii][jj] = 0
            else:
                dist_mat[ii][jj] = dtw_std([z for z in data[i]], [z for z in data[j]], dist_only=True)
            jj += 1
        ii += 1
        labels1.append(i)

    # Use average linkage + DTW to remove outliers

    avg = linkage(squareform(dist_mat), method='average', metric='euclidean')
    clusters = fcluster(avg, floor(log(num)/log(2)),'maxclust')
    #print(clusters)
    freq = {}
    for i in clusters:
        if i not in freq:
            freq[i] = 1
        else:
            freq[i] += 1

    # Stocks which appear in almost empty clusters are considered outliers
    thresh = max(freq.values())
    mfe = [k for k,v in freq.items() if v > floor(num/10)]

    f = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    
    plt.subplot(211)
    plt.title("With Outliers")
    if num > 50:
        dendrogram(avg, leaf_font_size=10)
    else:
        dendrogram(avg, labels=labels1,leaf_font_size=11)
    locs, ll=plt.xticks()
    plt.setp(ll, rotation=90)
    
    
    
    # Remove outliers and their labels
    outliers = []
    for i in range((len(clusters)-1),-1,-1):
        if clusters[i] not in mfe:
            print("Outlier stock: "+str(labels1[i]))
            outliers.append(labels1[i])
            dist_mat = np.delete(dist_mat, i, 0)
            dist_mat = np.delete(dist_mat, i, 1)
            del labels1[i]

    print("Total Outliers: "+ str(len(outliers)))

    # Finally do the clustering !!!
    ward = linkage(dist_mat, method='ward', metric='euclidean')
    clusters1= fcluster(ward, 3,'maxclust')
    #print(clusters1)
    #print(labels1)
    plt.subplot(212)
    plt.title("After removing outliers")
    if num > 50:
        dendrogram(ward, no_labels=True, leaf_font_size=10)
    else:
        dendrogram(ward, labels=labels1,leaf_font_size=11)
    plt.subplots_adjust(hspace=.5)
    locs, ll=plt.xticks()
    plt.setp(ll, rotation=90)
    plots.append(f)

    ###########        Plot Initials & Summaries           ############
    
    f2 = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    plt.subplot(211)
    colormap = plt.cm.gist_ncar
    f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)])
    for k in data.keys():
        data[k].plot()

    plt.subplot(212)
    f2.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.9, num)])
    for k in sum_t.keys():
        (sum_t[k]/10).plot()
    plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)
    plt.subplots_adjust(hspace=.32)
    plots.append(f2)


    #############       Clustering Summarized Timeseries        ############

    S = len(sum_t)
    dist_mat = np.empty(shape=(S,S))
    ii = 0
    labels2=[]
    # Calculate distance matrix using DTW
    for i in sum_t.keys():
        jj = 0
        for j in sum_t.keys():
            if ii==jj:
                dist_mat[ii][jj] = 0
            else:
                dist_mat[ii][jj] = dtw_std([z for z in sum_t[i]], [z for z in sum_t[j]], dist_only=True)
            jj += 1
        ii += 1
        labels2.append(i)

    # Use average linkage + DTW to remove outliers

    avg = linkage(squareform(dist_mat), method='average', metric='euclidean')
    clusters= fcluster(avg, floor(log(num,2)),'maxclust')
    #print(clusters)
    freq = {}
    for i in clusters:
        if i not in freq:
            freq[i] = 1
        else:
            freq[i] += 1
            
    # Stocks which appear in almost empty clusters are considered outliers
    thresh = max(freq.values())
    mfe = [k for k,v in freq.items() if v > floor(num/10)]
    f3 = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    plt.subplot(211)
    plt.title("With Outliers")
    if num > 50:
        dendrogram(avg, no_labels=True, leaf_font_size=10)
    else:
        dendrogram(avg, labels=labels2,leaf_font_size=11)
    locs, ll=plt.xticks()
    plt.setp(ll, rotation=90)
    
    outliers = []
    # Remove outliers
    for i in range((len(clusters)-1),-1,-1):
        if clusters[i] not in mfe:
            print("Outlier stock: "+str(labels2[i]))
            outliers.append(labels2[i])
            dist_mat = np.delete(dist_mat, i, 0)
            dist_mat = np.delete(dist_mat, i, 1)
            del labels2[i]

    print("Total Outliers: "+ str(len(outliers)))

    # Finally do the clustering !!!
    ward = linkage(dist_mat, method='ward', metric='euclidean')
    clusters2= fcluster(ward, 3,'maxclust')
    #print(clusters2)
    #print(labels2)
    plt.subplot(212)
    plt.title("After removing outliers")
    if num > 50:
        dendrogram(ward, leaf_font_size=10)
    else:
        dendrogram(ward, labels=labels2,leaf_font_size=11)
    plt.subplots_adjust(hspace=.5)
    locs, ll=plt.xticks()
    plt.setp(ll, rotation=90)

    plots.append(f3)
    colormap = plt.cm.gist_ncar
    f4 = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters with Initial Timeseries")
    for i in range(1,4):
        plt.subplot(int(310+i))
        
        f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters1)):
            if clusters1[j] == i:
                data[labels1[j]].plot(label=labels1[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)
                
        plt.legend(loc=4,prop={'size':10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f4)
                
    f5 = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters based on Summarization Timeseries")
    for i in range(1,4):
        plt.subplot(int(310+i))
        f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters2)):
            if clusters2[j] == i:
                data[labels2[j]].plot(label=labels2[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)
                
        plt.legend(loc=4,prop={'size':10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f5)

    f6 = plt.figure(figsize=(6,6), dpi=100, facecolor='white')
    plt.subplot(311)
    plt.title("Clusters with Summarization Timeseries")
    for i in range(1,4):
        plt.subplot(int(310+i))
        colormap = plt.cm.gist_ncar
        f4.gca().set_color_cycle([colormap(ii) for ii in np.linspace(0, 0.9, num)])

        for j in range(len(clusters2)):
            if clusters2[j] == i:
                (sum_t[labels2[j]]/10).plot(label=labels2[j])
                #plt.xlim(0, len(sum_t[max(sum_t, key=len)])-1)
                
        plt.legend(loc=4,prop={'size':10})
    plt.subplots_adjust(hspace=.55)
    plots.append(f6)
    
    return(plots)
    if counts[1][1] / float(counts[0][1]) > 0.8:
        print 'Ambiguous label for sampleid = %d : counts : %s' % (sampleid,
                counts)
    #print counts
    return counts[0][0]
##
newlabels = np.array([knn_DM(DM, sampleid) for sampleid in xrange(len(labels))])

modified = np.flatnonzero(newlabels - labels)
## Write a new file
with open('out.txt', 'w') as f:
    utils.write_data(f, accel, gyro, labels)

##
sample1 = 18
sample2 = 151

dist, cost, path = mlpy.dtw_std(accel[sample1,0,:], accel[sample2,0,:],
                                dist_only=False)
pl.figure()
pl.suptitle('dist = %f' % dist)
pl.subplot(211); pl.title('%d'%sample1); pl.plot(accel[sample1,0,:]); pl.ylim(0, 5000); pl.subplot(212); pl.title('%d'%sample2); pl.plot(accel[sample2,0,:]); pl.ylim(0,5000);

pl.figure()
pl.title('%d - %d' % (sample1, sample2))
pl.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')
pl.plot(path[0], path[1], 'w')
pl.xlim((-0.5, cost.shape[0]-0.5))
pl.ylim((-0.5, cost.shape[1]-0.5))
##
示例#53
0
    for i in range(alen + 1):
        mGamma[0][i] = inf
    mGamma[0][0] = 0
    for i in range(blen):
        for j in range(alen):
            cost = 1 - numpy.corrcoef(a[j], b[i])[0, 1]
            mGamma[i + 1][j + 1] = cost + min(mGamma[i][j], mGamma[i + 1][j],
                                              mGamma[i][j + 1])
    return mGamma[blen][alen]


dist = [[] for index in range(len(chromaset))]
for i in range(len(chromaset)):
    tempdist = []
    for j in range(12):
        tem, cost, p = mlpy.dtw_std(chromaset[i], hum_chroma, dist_only=False)
        tempdist.append(tem)
        hum_chroma = mod(hum_chroma + ones(len(hum_chroma)), 12)
    dist[i] = min(tempdist)

print song_list
print dist

match = song_list[dist.index(min(dist))]
for i in range(3):
    out1 = min(dist)
    print song_list[dist.index(out1)]
    song_list.remove(song_list[dist.index(out1)])
    dist.remove(out1)

execfile('all.py')
示例#54
0
文件: ExtLib.py 项目: Rventric/Bilder
         
 Comparisons-=1    
 
 # parameter descriptions
 #https://github.com/sauliusl/mlpy/blob/master/mlpy/dtw/dtw.pyx
 #dont compare if the dont have a 75 differece
 if len(Hand_Series[real])>len(SynthSeries[pseudo]) and len(SynthSeries[pseudo])/float(len(Hand_Series[real]))<0.65: 
     # print 'skip'
     dist=float("inf")
 elif len(SynthSeries[pseudo])>len(Hand_Series[real]) and len(Hand_Series[real])/float(len(SynthSeries[pseudo]))<0.65:
     # print 'skip'
     dist=float("inf")
 else:
     try:
         if 'SC' in Zoni:
             dist,cost,path=mlpy.dtw_std(SynthSeries[pseudo],Hand_Series[real], dist_only=False, metric='sqeuclidean', constraint='sakoe_chiba', k=PlatosZonis)
         else:
             dist,cost,path=mlpy.dtw_std(SynthSeries[pseudo],Hand_Series[real], dist_only=False, metric='euclidean', constraint='slanted-band', k=PlatosZonis)
     except Exception as ex:
         sys.exc_clear()  
         with open('/tmp/col_lathos.txt','w') as arxeio:
                     arxeio.write("dtw fak")  
     finally:
         
         dist=dist/float(len(SynthSeries[pseudo]) + len(Hand_Series[real]))
         print 'Normalized cost',dist
         
         
         print 'DTW cost',SynthNames[pseudo],'and',Hand_Names[real],  transcipt,dist
 
         
示例#55
0
    for w in xrange(0,len(SetTwoNames)):
        
        Comps-=1
        
        dist=1000
        if len(SetTwoFeats[w])>len(SetOneFeats[q]) and len(SetOneFeats[q])/float(len(SetTwoFeats[w]))<0.65: 
                # print 'skip'
                dist=float("inf")
        elif len(SetOneFeats[q])>len(SetTwoFeats[w]) and len(SetTwoFeats[w])/float(len(SetOneFeats[q]))<0.65:
                # print 'skip'
                dist=float("inf")
        else:
           
           
               
            dist,cost,path=mlpy.dtw_std(SetTwoFeats[w],SetOneFeats[q], dist_only=False, metric='euclidean', constraint='slanted-band', k=50)
                
        
                
            dist=dist/float(len(SetOneFeats[q]) + len(SetTwoFeats[w]))
            
            synth=SetTwoNames[w]
            res_transcript=''
            keyword=''
            pos=0

            for c in xrange(0,len(synth)):
                if '_' == synth[c]:
                    pos=c+1
                    keyword=synth[pos:].replace('.png','')
                
import json
import numpy
from scipy import interpolate
import mlpy
from main.util.common import readFromJson

lines = readFromJson("normalized_points.json")

timelines = map(lambda line: map(lambda x: x[1], json.loads(line)["twitter-data"]), lines)
numTimelines = len(timelines)

distances = []

for i in range(0, numTimelines):
    for j in range(i + 1, numTimelines):
        distances.append(mlpy.dtw_std(timelines[i], timelines[j]))

print json.dumps(distances)
示例#57
0
        # (e.g. 0, 0, 1, 1, 1, 2, 3, 3, 3)
        for i in range(rr(0, steps)):
            seq.append(n)
    return seq


if DEBUG:
    with Section('Dynamic Time Warping algorithm - MLPY'):
        # Using MLPY:
        # First, make sure deps are setup.
        # `brew install gsl`
        # Download from SF: http://mlpy.sourceforge.net/
        # Then install using setup.py:
        # `cd MLPY_PATH/setup.py install`

        # Now this makes it fun.
        x, y = random_timesequence(0, 10), random_timesequence(0, 10)

        # Taken from examples: http://mlpy.sourceforge.net/docs/3.5/dtw.html#id3
        distance, cost, path = mlpy.dtw_std(x, y, dist_only=False)
        fig = plot.figure(1)
        axes = fig.add_subplot(111)
        plot1 = plot.imshow(
            cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')

        plot2 = plot.plot(path[0], path[1], 'w')
        bound = 0.5
        xlim = axes.set_xlim((-bound, cost.shape[0] - bound))
        ylim = axes.set_ylim((-bound, cost.shape[1] - bound))
        plot.show()