def test_ncc_c_2_and_3dim_matches(self): test = zscore(np.array([ [1, 2, 3, 4, 5], [0, 10, 4, 5, 7], [-1, 15, -12, 8, 9], ]), axis=1) centroids = np.array([[1, 1, 0, 1, 1], [10, 12, 0, 0, 1]]) distances1 = np.empty((3, 2)) distances2 = np.empty((3, 2)) for i in range(3): for j in range(2): distances1[i, j] = 1 - _ncc_c(test[i], centroids[j]).max() for j in range(2): distances2[:,j] = 1 - _ncc_c_2dim(test, centroids[j]).max(axis=1) distances3 = (1 - _ncc_c_3dim(test, centroids).max(axis=2)).T np.testing.assert_array_equal(distances1, distances2) np.testing.assert_array_equal(distances2, distances3)
def test_ncc_c_2_and_3dim_matches(): test = zscore(np.array([ [1, 2, 3, 4, 5], [0, 10, 4, 5, 7], [-1, 15, -12, 8, 9], ]), axis=1) centroids = np.array([[1, 1, 0, 1, 1], [10, 12, 0, 0, 1]]) distances1 = np.empty((3, 2)) distances2 = np.empty((3, 2)) for i in range(3): for j in range(2): distances1[i, j] = 1 - _ncc_c(test[i], centroids[j]).max() for j in range(2): distances2[:, j] = 1 - _ncc_c_2dim(test, centroids[j]).max(axis=1) distances3 = (1 - _ncc_c_3dim(test, centroids).max(axis=2)).T assert not (distances1 != distances2).any() assert not (distances2 != distances3).any()
from kshape.core import kshape, zscore time_series = [[1, 2, 3, 4], [0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 2, 3]] cluster_num = 2 clusters = kshape(zscore(time_series, axis=1), cluster_num)
#def data_plotter(data): if __name__ == '__main__': with open( 'C://Users//k_mathin//PycharmProjects//Masters//ciena_trials//Kamal//data//vodafone_data_clusters_filtered.pkl', 'rb') as f: data_set = pickle.load(f) data = [] for d in data_set['data']: data.append(d) data = np.asarray(data) #data = data[:,:15] print(data.shape[0]) label_data = np.asarray(data_set['osid']) labels, levels = pd.factorize(label_data) shelves = np.asarray(data_set['shelf']) cluster_num = levels.shape[0] print(cluster_num) clusters = kshape(zscore(data, axis=1), cluster_num) #clusters = kshape(data,cluster_num) y_pred = [] for i in range(0, data.shape[0]): for j in range(0, cluster_num): if i in clusters[j][1]: y_pred.append(j) continue conf = conf_mat(labels, y_pred) print(conf_mat(labels, y_pred)) print("done")
def kshape_clusters(arr, cluster_num, ax=1): from kshape.core import kshape, zscore clusters = kshape(zscore(arr, ax), cluster_num) re_arr = apply_clusters(clusters, arr) return re_arr
from kshape.core import kshape, zscore time_series = [[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [3, 2, 1, 0, -1], [1, 2, 2, 3, 3]] cluster_num = 2 clusters = kshape(zscore(time_series), cluster_num) print(clusters)
#%% causes = pd.read_pickle('data/causes.pkl') def cluster_show(cluster_rep, cluster_id): #plt.plot(cluster_rep) for i in cluster_id: ev = causes.iloc[i]['id'] temp_In = list(Event(ev, start, end).data[' In'].values) plt.plot(temp_In) plt.legend(list(causes.iloc[cluster_id]['cause'])) plt.show() cluster_num = 6 clusters = kshape(zscore(I_ns, axis=1), cluster_num) for i in range(cluster_num): print(causes.iloc[clusters[i][1]], '\n', '----------------------') cluster_show(clusters[i][0], clusters[i][1]) #%% import statsmodels.api as sm dta = sm.datasets.co2.load_pandas().data # deal with missing values. see issue dta.co2.interpolate(inplace=True) id = whole_events[100] start = 0 end = -1 e = Event(id, start, end)