def test_clustering_tree(directory=None): s = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [1., 2, 0, 0, 0, 0, 0, 1, 1]]) def test_hook(from_idx, to_idx, distance): assert (from_idx, to_idx) in [(3, 0), (4, 1), (5, 2), (6, 2), (1, 0), (2, 0)] model = clustering.Hierarchical(dtw.distance_matrix_fast, {}, merge_hook=test_hook, show_progress=False) modelw = clustering.HierarchicalTree(model) cluster_idx = modelw.fit(s) assert cluster_idx[0] == {0, 1, 2, 3, 4, 5, 6} if directory: hierarchy_fn = os.path.join(directory, "hierarchy.png") graphviz_fn = os.path.join(directory, "hierarchy.dot") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_hierarchy.png" graphviz_fn = file.name + "_hierarchy.dot" modelw.plot(hierarchy_fn) print("Figure saved to", hierarchy_fn) with open(graphviz_fn, "w") as ofile: print(modelw.to_dot(), file=ofile) print("Dot saved to", graphviz_fn)
def main(): s = np.array([ np.flip([0., 0, 1, 2, 1, 0, 1, 0, 0, 1]), [0., 1, 2, 0, 0, 0, 0, 0, 0, 1], np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0), [0., 0, 1, 2, 1, 0, 1, 0, 0, 1], [0., 1, 2, 0, 0, 0, 0, 0, 0, 1], np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0), np.flip([1., 2, 0, 0, 0, 0, 0, 1, 1, 1], 0) ]) # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) cluster_idx = model1.fit(s) # Keep track of full tree by using the HierarchicalTree wrapper class model2 = clustering.HierarchicalTree(model1) cluster_idx = model2.fit(s) fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10)) show_ts_label = lambda idx: "ts-" + str(idx) model2.plot('hierarchy.jpg', axes=ax, show_ts_label=show_ts_label, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1) # reading png image file im = img.imread('hierarchy.jpg') # show image plt.imshow(im)
def get_cluster(): """ Function to get the clustering for the time series getting the distances between each operation. """ series = [] aux_file_path = r'C:\TFM\auxdata\hist_protected.csv' data_path = r'C:\TFM\data\2018\2018.csv' hierarchical_plot = r'C:\TFM\dtw\hierarchical_cluster.png' linkage_plot = r'C:\TFM\dtw\linkage_cluster.png' df_aux = pd.read_csv(aux_file_path, header=0, delimiter=',', parse_dates=[SEGMENT_BEGIN, SEGMENT_END]) df_data = pd.read_csv(data_path, header=0, delimiter=',', parse_dates=[DATE]) # print(df_aux[SEGMENT_BEGIN, SEGMENT_END][df_data[OPERATION_ID_NUMBER] == 4]) op_no = 28 program_number = 1108805036 # df1 = df[(df.a != -1) & (df.b != -1)] # begin_date = (df_aux.loc[(df_aux[OPERATION_ID_NUMBER] == op_no)][SEGMENT_BEGIN]) # Get begin date and end date for each time serie corresponding to the begin_date = ( df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no) & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_BEGIN]) end_date = ( df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no) & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_END]) data_index = begin_date.index # data_index = data_index[:30] for item in data_index: if item > YEAR_INDEX_LIMIT: break else: series_begin = begin_date[item] series_end = end_date[item] aux_series = df_data.loc[(df_data[DATE] >= series_begin) & (df_data[DATE] <= series_end)] if not aux_series.empty: df_spload = aux_series[SPINDLE_LOAD] df_spload = np.array(df_spload) series.append(df_spload) # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) cluster_idx = model1.fit(series) try: # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) cluster_idx = model2.fit(series) model2.plot(hierarchical_plot, show_tr_label=True) except Exception as ex: print(ex) # SciPy linkage clustering try: model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(series) model3.plot(linkage_plot, show_tr_label=True) except Exception as ex: print(ex)
head = list(df.columns.values) # get machine names print("head", head) # print machine names df = df.T # transpose the data df = df.values ds = dtw.distance_matrix_fast(df) # get dist matrix ds[ds == inf] = 0 # replace all infinity vals in the dist matrix with 0. pd.DataFrame(ds).to_excel("ds.xlsx") # save dist matrix to a xlsx. # clustering starts # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) # SciPy linkage clustering model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(df) # plot fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 15)) model3.plot("hierarchy.png", axes=ax, show_ts_label=head, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1) # to find number of clusters NumberOfClusters=range(2,30) silhouette_score_values=list()
def cluster(time_series_set, name): path = "./static/cluster_data.csv" cluster_data = csv.reader(open(path, 'r')) name_list = [] series_list = [] for row in cluster_data: #print(row) #print("row", row) name_list.append(row[0]) #print("name", name_list) series = row[1:] #print("series", series) float_series = [] for i in series: float_series.append(float(i)) np_series = np.array(float_series) temp_series = stats.zscore(np_series) series_list.append(temp_series) if name not in name_list: # timeseries是性能指标序列 time_series = [] time_series_with_name = [] time_series_with_name.append(name) for row in time_series_set: time_series.append(row[1]) time_series_with_name.append(row[1]) #print(time_series) with open(path, 'a') as f: csv_write = csv.writer(f) csv_write.writerow(time_series_with_name) f.close() name_list.append(name) float_series = [] for i in time_series: float_series.append(float(i)) np_series = np.array(float_series) temp_series = stats.zscore(np_series) series_list.append(temp_series) # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) cluster_idx = model1.fit(series_list) # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) cluster_idx = model2.fit(series_list) # SciPy linkage clustering model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(series_list) # model2.plot("hierarchy.png") fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10)) show_ts_label = lambda idx: name_list[idx] model2.plot("hierarchy.png", axes=ax, show_ts_label=show_ts_label, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1)