subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm) # serial # subdgms = gs2dgms(gs, fil=fil, fil_d='sub', norm=norm, one_hom=False) # step2 # TODO: need to add interface # supdgms = gs2dgms(gs, fil=fil, fil_d='sup', norm=norm, one_hom=False) # step2 # # epddgms = gs2dgms(gs, fil=fil, norm=norm, one_hom=True) # step2 # TODO dgms = combine_dgms(subdgms, supdgms, epddgms, args) dgms = permute_dgms(dgms, permute_flag=args.permute, permute_ratio=0.5) dgms_summary(dgms) # sw kernel swdgms = dgms2swdgms(dgms) kwargs = {'bw': args.bw, 'n_directions': 10, 'K': 1, 'p': 1} sw_kernel, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **kwargs) print(sw_kernel.shape) clf = classifier(labels, labels, method='svm', n_cv=args.n_cv, kernel=sw_kernel) clf.svm_kernel_(n_splits=10) print(clf.stat)
def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs, ntda): """ All hyperprameter goes here. :param graph: graph dataset :param fil: filtration function :param norm: normalize or not :param permute: whether permute dgm :param ss: both sublevel and superlevel or not :param epd: include extended persistence or not :param n_cv: number of cross validation :return: """ global gs print('feat kwargs', feat_kwargs) db = get_tda_db() params = { 'graph': graph, 'fil': fil, 'norm': norm, 'permute': permute, 'ss': ss, 'epd': epd, 'n_cv': n_cv, 'flip': flip, 'feat': feat, 'ntda': ntda, 'feat_kwargs': feat_kwargs } if check_duplicate(db, params): return label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1] # gs, labels = load_graphs(dataset=graph, labels_only=label_flag) # step 1 gs, labels = load_tugraphs( graph, labels_only=False ) # labels_only true means gs is None. Turned on for high speed # parallel # subdgms = gs2dgms(gs, n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph = graph, ntda = ntda, debug_flag=True) subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph=graph, ntda=ntda) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm, graph=graph, ntda=ntda) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm, graph=graph, ntda=ntda) dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip) dgms = permute_dgms(dgms, permute_flag=permute) # old way dgms_summary(dgms) swdgms = dgms2swdgms(dgms) if feat == 'sw': print(feat_kwargs) k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(k.shape) cmargs = {'print_flag': 'off'} # confusion matrix clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k, **cmargs) clf.svm_kernel_(n_splits=10) elif feat == 'pi': # vector params = { 'bandwidth': 1.0, 'weight': (1, 1), 'im_range': [0, 1, 0, 1], 'resolution': [5, 5] } images = merge_dgms(subdgms, supdgms, epddgms, vectype='pi', ss=ss, epd=epd, **params) clf = classifier(images, labels, method='svm', n_cv=n_cv) clf.svm(n_splits=10) elif feat == 'pss': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='pss', **feat_kwargs) # print(k.shape, k, np.max(k)) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) elif feat == 'wg': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='wg', **feat_kwargs) print(k.shape) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) elif feat == 'pervec': cmargs = {'print_flag': 'on'} # confusion matrix pd_vector = dgms2vec(dgms, vectype='pervec', **feat_kwargs) clf = classifier(pd_vector, labels, method='svm', n_cv=n_cv, **cmargs) clf.svm(n_splits=10) elif feat == 'pf': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=False, kernel_type='pf', **feat_kwargs) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) else: raise Exception('No such feat %s' % feat) print(clf.stat) print_line() return clf.stat
kwargs = {'h': 0.3} g = fil_strategy(g, lapfeat, method=fil_method, viz_flag=False, **kwargs) ego = egograph(g, radius=radius, n=len(g), recompute_flag=True, norm_flag=True, print_flag=False) egographs = ego.egographs(method='serial') dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n1 + n2, zigzag=zigzag) # compute dgms in parallel swdgms = dgms2swdgms(dgms) kwargs = {'bw': 1, 'n_directions': 10} sw_kernel, _ = sw_parallel(swdgms, swdgms, kernel_type='sw', parallel_flag=True, **kwargs) clf = classifier(np.zeros((n1 + n2, 10)), labels, method=None, kernel=sw_kernel) clf.svm_kernel_()
indicator_labels = [1] * len(fake_dgms) + [-1] * len(another_fake_dgms) all_dgms = dgms2swdgms(all_dgms) # classify true diagrams from fake ones feat_kwargs = {'n_directions': 10, 'bw': 1} k, _ = sw_parallel(all_dgms, all_dgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(k.shape) cmargs = {'print_flag': 'off'} # confusion matrix clf = classifier(indicator_labels, indicator_labels, method='svm', n_cv=1, kernel=k, **cmargs) clf.svm_kernel_(n_splits=10) if not args.viz: sys.exit('-' * 50) feat_kwargs = {'n_directions': 10, 'bw': 1} k, _ = sw_parallel(all_dgms, all_dgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(np.diag(k).shape) k_diag = np.diag(np.diag(k)) kdist = diag2m(k_diag) + diag2m(k_diag, row_major=False) - 2 * k assert kdist[1, 2] == k[1, 1] + k[2, 2] - 2 * k[1, 2]
def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs): """ All hyperprameter goes here. :param graph: graph dataset :param fil: filtration function :param norm: normalize or not :param permute: whether permute dgm :param ss: both sublevel and superlevel or not :param epd: include extended persistence or not :param n_cv: number of cross validation :return: """ global gs print('kwargs', feat_kwargs) label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1] # gs, labels = load_graphs(dataset=graph, labels_only=label_flag) # step 1 gs, labels = load_tugraphs(graph, labels_only=True) # parallel subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph=graph) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm, graph=graph) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm, graph=graph) dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip) dgms = permute_dgms(dgms, permute_flag=permute, permute_ratio=0.5) dgms_summary(dgms) swdgms = dgms2swdgms(dgms) if feat == 'sw': print(feat_kwargs) k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'pi': params = { 'bandwidth': 1.0, 'weight': (1, 1), 'im_range': [0, 1, 0, 1], 'resolution': [5, 5] } images = merge_dgms(subdgms, supdgms, epddgms, vectype='pi', ss=ss, epd=epd, **params) clf = classifier(images, labels, method='svm', n_cv=n_cv) clf.svm(n_splits=10) return clf.stat elif feat == 'pss': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=False, kernel_type='pss', **feat_kwargs) print(k.shape, k, np.max(k)) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'wg': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='wg', **feat_kwargs) print(k.shape) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'pdvector': pass
if args.kernel == 'sw': swdgms = dgms2swdgms(dgms) for bw in [0.1, 1, 10, 100]: feat_kwargs = {'n_directions': 10, 'bw': bw} print(f'star computing kernel...') k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(k.shape) cmargs = {'print_flag': 'off'} # confusion matrix clf = classifier(labels, labels, method='svm', n_cv=1, kernel=k, **cmargs) clf.svm_kernel_(n_splits=10) sys.exit() # convert to vector # kwargs = {'num_landscapes': 5, 'resolution': 100, 'keep_zero': True} # x = dgms2vec(dgms, vectype='pl', **kwargs) kwargs = {'dim': 100} print('using pervec') x = dgms2vec(dgms, vectype='pervec', **kwargs) if args.random: x = np.random.random(x.shape) if args.norm: x = normalize_(x, axis=0)
def main(idx, n_iter, clf, test_size, vec, method, seg, permute, norm): cat_dict = prince_cat() for k, v in cat_dict.items(): if idx >= k[0] and idx <= k[1]: print(f'idx {idx} is {v}') break # seg one shape dgms = loaddgm(str(idx), form='dionysus') dgms = flip_dgms(dgms) if permute: dgms = permute_dgms(dgms, permute_flag=True, seed_flag=True) # vectorize if vec == 'pvector': dgm_vector = dgms2vec( dgms, vectype='pvector' ) # print(np.shape(pd_vector), np.shape(pd_vectors)) elif vec == 'pl': kwargs = {'num_landscapes': 5, 'resolution': 100} dgm_vector = dgms2vec(dgms, vectype='pl', **kwargs) elif vec == 'pervec': kwargs = {'dim': 300} dgm_vector = dgms2vec( dgms, vectype='pervec', **kwargs) # print(np.shape(pd_vector), np.shape(pd_vectors)) dgm_vector = normalize_(dgm_vector) else: raise Exception(f'No vec like {vec}') y = loady(model=idx, counter=True, seg=seg) X, Y = [], [] n_face, n_node = face_num(str(idx)), node_num(str(idx)) face_x = np.zeros((n_face, dgm_vector.shape[1])) face_indices = face_idx(str(idx)) for i in range(n_face): idx1, idx2, idx3 = face_indices[i] idx1, idx2, idx3 = int(idx1), int(idx2), int(idx3) face_x[i, :] = dgm_vector[idx1][:] + dgm_vector[idx2, :] + dgm_vector[ idx3, :] print(face_x.shape, y.shape) X.append(face_x) Y.append(y) X, Y = np.concatenate(X), np.concatenate(Y) if norm: X = normalize(X, axis=0) print(f'X is of shape {dgm_vector.shape} and Y is of shape {y.shape}\n') # classifer if clf == 'rf': clf = classifier(X, Y, method='svm', n_cv=1) res = clf.svm(n_splits=10) # todo res format else: kwargs = {} res = eigenpro(X, Y, max_iter=n_iter, test_size=test_size, bd=1, **kwargs) print('-' * 150) return res
for i in range(n_face): idx1, idx2, idx3 = face_indices[i] idx1, idx2, idx3 = int(idx1), int(idx2), int(idx3) face_x[i, :] = dgm_vector[idx1][:] + dgm_vector[idx2, :] + dgm_vector[ idx3, :] print(face_x.shape, y.shape) X.append(face_x) Y.append(y) X, Y = np.concatenate(X), np.concatenate(Y) if args.norm: X = normalize(X, axis=0) # classifer print() if args.clf == 'rf': clf = classifier(X, Y, method='svm', n_cv=1) clf.svm(n_splits=10) else: kwargs = {} res = eigenpro(X, Y, max_iter=args.n_iter, test_size=args.test_size, bd=1, **kwargs) print('-' * 150) sys.exit() # check consistency btwn loady and load_labels y = loady(model=args.idx, counter=True, seg=args.seg)
rs = args.rs radius, fil = 1, args.fil n_node, p = 100, args.p sizes = [n_node] * 3 permute_flag = True labels = [0] * n_node + [1] * n_node + [2] * n_node probs = [[0.5, p, p], [p, 0.5, p], [p, p, 0.5]] g = stochastic_block_model(sizes, probs, seed=rs) lp = LaplacianEigenmaps(d=1) lp.learn_embedding(g, weight='weight') lapfeat = lp.get_embedding() degfeat = np.array(list(dict(nx.degree(g)).values())).reshape(3 * n_node, 1) clf = classifier(degfeat, labels, method=None) clf.svm() for n in g.nodes(): g.node[n]['lap'] = float(lapfeat[n,0]) g = add_edgeval(g, fil=fil) ego = egograph(g, radius=radius, n = len(g), recompute_flag=True, norm_flag=True, print_flag=False) egographs = ego.egographs(method='serial') dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n_node) # compute dgms in parallel if permute_flag: dgms = permute_dgms(dgms) dgms_summary(dgms) swdgms = dgms2swdgms(dgms)
# viz fv value # val = dict(nx.get_node_attributes(gs[i], 'fv')).values() # plt.plot(val) # plt.title('q: %s, i: %s'%(q, i)) # plt.show() # sys.exit() print('Finish computing lapfeat') dgms = alldgms(gs, radius=float('inf'), dataset='', recompute_flag=True, method='serial', n=2 * n, zigzag=zigzag) # compute dgms in parallel print('Finish computing dgms') swdgms = dgms2swdgms(dgms) feat_kwargs = {'n_directions': 10, 'bw': 1} sw_kernel, _ = sw_parallel(swdgms, swdgms, kernel_type='sw', parallel_flag=True, **feat_kwargs) clf = classifier(np.zeros((len(labels), 10)), labels, method=None, kernel=sw_kernel) print(clf.svm_kernel_()) print(p, q, edge_kwargs)
n=len(g), recompute_flag=True, norm_flag=True, print_flag=False) egographs = ego.egographs(method='parallel') dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n_node, zigzag=zigzag) # compute dgms in parallel dgms_summary(dgms) print_dgm(dgms[0]) swdgms = dgms2swdgms(dgms) for bw in [10]: kwargs = {'bw': bw, 'K': 1, 'p': 1} # TODO: K and p is dummy here sw_kernel, _ = sw_parallel(swdgms, swdgms, kernel_type='sw', parallel_flag=True, **kwargs) sw_distancem = np.log(sw_kernel) * (-2) # viz_matrix(sw_distancem) clf = classifier(np.zeros((3 * n_node, 10)), labels, method=None, kernel=sw_kernel) clf.svm_kernel_()