colors = datainfo.colors
    #f = h5py.File(datainfo.dpath + datainfo.name + ext + '.hdf5', 'r+')
    for s in datainfo.sensors:
        print s
        mdist = np.zeros((len(datainfo.datafiles), len(datainfo.datafiles)))
        print mdist.shape
        for i, rfile in enumerate(datainfo.datafiles):
            rfile = rfile
            rlabels = compute_data_labels(rfile, rfile, s)
            rcnt = Counter(list(rlabels))
            rhisto = [rcnt[i] for i in range(len(rcnt))]
            for j, dfile in enumerate(datainfo.datafiles):
                dlabels = compute_data_labels(rfile, dfile, s)
                cnt = Counter(list(dlabels))
                histo = [cnt[i] for i in range(len(cnt))]
                mdist[i,j] = hellinger_distance(rhisto, histo)
                #print rfile, dfile, hellinger_distance(rhisto, histo)
        for i in range(mdist.shape[0]):
            for j in range(mdist.shape[1]):
                tmp = (mdist[i, j] + mdist[j, i])/2
                mdist[i, j] = mdist[j,i] = tmp


        transf = MDS(n_components=2, dissimilarity='precomputed', n_jobs=-1)
        #transf = TSNE(n_components=2, metric='precomputed')
        mres = transf.fit_transform(mdist)
        print transf.stress_
        fig = plt.figure()
        #ax = fig.gca(projection='3d')
        #plt.scatter(mres[:, 0], mres[:, 1], zs=mres[:, 2], c=colors, s=100)
        plt.scatter(mres[:, 0], mres[:, 1], c=colors, s=100)
            if args.pairs:
                counts_model = estimate_frequency_pairs_model(sequences[0:estsize], nclusters, laplace=laplace)
            else:
                counts_model = estimate_frequency_model(sequences[0:estsize], nclusters, laplace=laplace)


            for i in range(0, len(sequences)-stepsize, stepsize):
                tk += 1
                if args.pairs:
                    counts_model_ahead = estimate_frequency_pairs_model(sequences[i:i + estsize], nclusters, laplace=laplace)
                else:
                    counts_model_ahead = estimate_frequency_model(sequences[i:i + estsize], nclusters, laplace=laplace)

                #diff = jensen_shannon_divergence(counts_model/sum(counts_model), counts_model_ahead/sum(counts_model_ahead))
                diff = hellinger_distance(counts_model/sum(counts_model), counts_model_ahead/sum(counts_model_ahead))

                lldiff.append(diff)
                if diff>tolerance:
                    counts_model = counts_model_ahead.copy()
                    llmark.append(tolerance*1.2)
                else:
                    llmark.append(0)
                if i > seqend[0]:
                    seqend.pop(0)
                    tkpos.append(tk)
            tkpos.insert(0, 0)

            sp1 = fig.add_subplot(nfil, ncol, nfig)
            sp1.axis([0, len(lldiff), 0, tolerance*1.2])
            plt.title(sensor)
Пример #3
0
                histo = np.zeros(nclusters)
                for i in labels:
                    histo[i] += 1.0
                histo /= len(labels)
                lhisto.append(histo)

            df = pd.DataFrame(np.array(lhisto), index=datainfo.expnames, columns=['class-%d'%i for i in range(1, nclusters+1)])

            if args.hellinger:
                lrms = []
                lhell = []
                for h, nphase in zip(lhisto, datainfo.expnames):
                    rms = np.dot(lhisto[0] - h,  lhisto[0] - h)
                    rms /= h.shape[0]
                    lhell.append(hellinger_distance(h, lhisto[0]))
                    lrms.append(np.sqrt(rms))
                df['Hellinger'] = lhell
                df['RMS'] = lrms

            rfile = open(datainfo.dpath + '/' + datainfo.name + '/Results/cluster-histo-' + dfile + '-' + sensor + '-' +
                        str(nclusters) + '-' + ext + '.txt', 'w')
            rfile.write(df.to_string(line_width=200))

            rfile.close()

            matplotlib.rcParams.update({'font.size': 30})
            fig = plt.figure()
            ax = fig.add_subplot(2, 1, 1)
            fig.set_figwidth(30)
            fig.set_figheight(20)
        print data.shape

        lhisto = []
        for dataf, ndata in zip(ldata, datainfo.datafiles):
            histo = np.zeros(nclusters)
            for i in range(dataf.shape[0]):
                histo[km.predict(dataf[i])] += 1.0
            histo /= dataf.shape[0]
            print datainfo.name, ndata
            print histo
            lhisto.append(histo)

        for h in lhisto[1:]:
            rms = np.dot(lhisto[0] - h,  lhisto[0] - h)
            rms /= h.shape[0]
            print np.sqrt(rms), hellinger_distance(h, lhisto[0])


        fig, ax = plt.subplots()
        fig.set_figwidth(30)
        fig.set_figheight(40)

        ind = np.arange(nclusters)  # the x locations for the groups
        width = 0.10       # the width of the bars
        ax.set_xticks(ind+width)
        ax.set_xticklabels( ind )
        for i, h in enumerate(lhisto):
            rects = ax.bar(ind+(i*width), h, width, color=colors[i])
        fig.suptitle(datainfo.name + '-' + s + ext, fontsize=48)
        fig.savefig(datainfo.dpath+'/Results/' + datainfo.name + '-' + s + ext + '-histo.pdf', orientation='landscape', format='pdf')
    #    plt.show()
                        histo[km.predict(dataf[i])] += 1.0
                    histo /= dataf.shape[0]
                    # print(datainfo.name, ndata)
                    # print('HISTO ', histo)
                    histosorted = np.zeros(nclusters)
                    for i in range(histosorted.shape[0]):
                        histosorted[i] = histo[lmax[i][0]]
                else:
                    histosorted = np.zeros(nclusters)
                lhisto.append(histosorted)

            if args.hellinger:
                for h in lhisto[1:]:
                    rms = np.dot(lhisto[0] - h,  lhisto[0] - h)
                    rms /= h.shape[0]
                    print(np.sqrt(rms), hellinger_distance(h, lhisto[0]))

            matplotlib.rcParams.update({'font.size': 30})
            fig = plt.figure()
            ax = fig.add_subplot(2, 1, 1)
            fig.set_figwidth(60)
            fig.set_figheight(40)
            ind = np.arange(nclusters)  # the x locations for the groups
            width = 1.0/(len(lhisto)+1)   # the width of the bars
            ax.set_xticks(ind+width)
            ax.set_xticklabels(ind)
            for i, h in enumerate(lhisto):
                rects = ax.bar(ind+(i*width), h, width, color=colors[i])
            fig.suptitle(datainfo.name + '-' + sensor, fontsize=48)

            minaxis = np.min(centroids)