def __init__(self, X_train, freq_itemset_dict, rule_list, contrast_params, epsilon=5e-5, eps_step=0.1, reduced_rate=None): self.rule_list = rule_list self.freq_itemset_dict = freq_itemset_dict self.original_features = X_train self.sample_features = X_train self.contrast_params = contrast_params self.epsilon = epsilon self.eps_step = eps_step if reduced_rate is not None: ncomponents = int(X_train.shape[1] * reduced_rate) pca = IncrementalPCA(n_components=ncomponents) self.sample_features = pca.fit_transform(X_train)
'cluster' : ('', 'Path of clusters file'), 'output' : ('', 'Path of output file'), 'title' : ('Dataset', 'Title of charts') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) X, association_rules = load_feature_vectors(config.get_value('feature')) m = 2 print('dimensional reduce: ' + str(m)) pca = IncrementalPCA(n_components = X.shape[1]//m) new_X = pca.fit_transform(X) clusters, number_of_clusters = load_clusters(config.get_value('cluster')) print (number_of_clusters) unique_colors = get_N_HexCol(number_of_clusters + 1) Y = [] for rule in association_rules: cluster_id = clusters[rule] Y.append(unique_colors[cluster_id + 1]) #plt.scatter(new_X[:,0], new_X[:,1], c = np.array(Y), alpha = 0.9, s = 10) #plt.title(config.get_value('title')) #plt.savefig(config.get_value('output'), format='PNG',bbox_inches='tight') #np.array(Y)
print() # show PCA pca_queries = [ "ID:wrapper", "ID:container", "ID:msg", "ID:alert", "ID:list", "ID:seq", "ID:lst", "ID:list", "LIT:error" ] pca_vectors = [] pca_labels = [] for _, name in enumerate(pca_queries): if name.startswith("LIT:"): print_name = "\"" + name.replace( "LIT:", "") + "\"" # assumes string literals only else: print_name = name.replace("ID:", "") pca_labels.append(print_name) pca_vectors.append(model.wv[name]) ipca = IncrementalPCA(n_components=2) reduced_vectors = ipca.fit_transform(pca_vectors) fig, ax = pyplot.subplots() x = reduced_vectors[:, 0] y = reduced_vectors[:, 1] ax.scatter(x, y) for idx, label in enumerate(pca_labels): #escaped_label = re.escape(label) ax.annotate(label, (x[idx], y[idx])) pyplot.show()