def get_counts(d, U): rates = [] for i in range(0, 100): if len(d[i]['solutions']) >= U: if d[i]['solutions'][U]['eq'] == set([-1]): #supercliques rates.append(1000) else: #not a superclique s = d[i]['solutions'][U]['eq'] H = bfutils.undersample(d[i]['gt'], U) all_rates = unknownrate.withrates(s, H).values() #print all_rates for rate in all_rates: rates.append(rate[0]) else: #<U tempu = len(d[i]['solutions']) if d[i]['solutions'][tempu]['eq'] == set([-1]): #supercliques rates.append(1000) else: #not a superclique s = d[i]['solutions'][tempu]['eq'] H = bfutils.undersample(d[i]['gt'], tempu) all_rates = unknownrate.withrates(s, H).values() #print all_rates for rate in all_rates: rates.append(rate[0]) keys = np.sort(np.unique(rates)) c = {} for k in keys: c[k] = len( np.where(rates == k)[0]) #key is rate and value is frequency #print c return c
def get_counts(d,U): rates = [] for i in range(0,100): if len(d[i]['solutions'])>=U: if d[i]['solutions'][U]['eq'] == set([-1]): #supercliques rates.append(1000) else: #not a superclique s = d[i]['solutions'][U]['eq'] H = bfutils.undersample(d[i]['gt'],U) all_rates = unknownrate.withrates(s,H).values() #print all_rates for rate in all_rates: rates.append(rate[0]) else:#<U tempu = len(d[i]['solutions']) if d[i]['solutions'][tempu]['eq'] == set([-1]): #supercliques rates.append(1000) else: #not a superclique s = d[i]['solutions'][tempu]['eq'] H = bfutils.undersample(d[i]['gt'],tempu) all_rates = unknownrate.withrates(s,H).values() #print all_rates for rate in all_rates: rates.append(rate[0]) keys = np.sort(np.unique(rates)) c = {} for k in keys: c[k] = len(np.where(rates == k)[0]) #key is rate and value is frequency #print c return c
def ok2addaVpath(e,p,g,g2,rate=2): mask = addaVpath(g,e,p) if not isedgesubset(bfu.undersample(g,rate), g2): cleanVedges(g,e,p,mask) return False cleanVedges(g,e,p,mask) return True
def estOE(d): gt = d['gt']['graph'] gt = bfu.undersample(gt, 1) e = gk.OCE(d['estimate'], gt) N = np.double(len(gk.edgelist(gt))) +\ np.double(len(gk.bedgelist(gt))) return (e['directed'][0] + e['bidirected'][0]) / N
def supergraphs_in_eq(g, g2, rate=1): '''Find all supergraphs of g that are also in the same equivalence class with respect to g2 and the rate. Currently works only for bfu.undersample by 1 ''' if bfu.undersample(g,rate) != g2: raise ValueError('g is not in equivalence class of g2') s = set() def addnodes(g,g2,edges): if edges: masks = [] for e in edges: if ok2addanedge(e[0],e[1],g,g2,rate=rate): masks.append(True) else: masks.append(False) nedges = [edges[i] for i in range(len(edges)) if masks[i]] n = len(nedges) if n: for i in range(n): mask = addanedge(g,nedges[i]) s.add(bfu.g2num(g)) addnodes(g,g2,nedges[:i]+nedges[i+1:]) delanedge(g,nedges[i],mask) edges = gk.edgelist(gk.complement(g)) addnodes(g,g2,edges) return s
def estOE(d): gt= d['gt']['graph'] gt=bfu.undersample(gt,1) e = gk.OCE(d['estimate'],gt) N = np.double(len(gk.edgelist(gt))) +\ np.double(len(gk.bedgelist(gt))) return (e['directed'][0]+e['bidirected'][0])/N
def get_subplot_x_y(density_of_gt_file, undersampling): d = zkl.load(density_of_gt_file) x = [] y = [] for i in range(0, 100): g2 = bfutils.undersample(d[i]['gt'], undersampling) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][undersampling]['ms']) #add the time return x, y
def generate_H(num_nodes): numextraedge = np.random.randint(low = 1,high = 3) g = bfutils.ringmore(num_nodes,numextraedge) #ground truth gs= bfutils.call_undersamples(g) # all possible undersamples for g randomu = np.random.randint(low = 1,high = len(gs)) #now we can make the H H = bfutils.undersample(g,randomu) #print H return H
def estCOE(d): gt = d['gt']['graph'] gt = bfu.undersample(gt, 1) e = gk.OCE(d['estimate'], gt) n = len(gt) N = np.double(n**2+(n-1)**2/2.0\ -len(gk.edgelist(gt)) -len(gk.bedgelist(gt))) return (e['directed'][1] + e['bidirected'][1]) / N
def get_subplot_x_y(density_of_gt_file,undersampling): d = zkl.load(density_of_gt_file) x = [] y = [] for i in range(0,100): g2 = bfutils.undersample(d[i]['gt'],undersampling) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][undersampling]['ms']) #add the time return x,y
def estCOE(d): gt= d['gt']['graph'] gt=bfu.undersample(gt,1) e = gk.OCE(d['estimate'],gt) n = len(gt) N = np.double(n**2+(n-1)**2/2.0\ -len(gk.edgelist(gt)) -len(gk.bedgelist(gt))) return (e['directed'][1]+e['bidirected'][1])/N
def nodesearch(g, g2, order, inlist, s, cds, pool, pc): if order: if bfu.undersample(g, rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s) > capsize: raise ValueError('Too many elements') s.update(supergraphs_in_eq(g, g2, rate=rate)) return g key = order[0] if pc: tocheck = [ x for x in pc if x in cds[len(inlist) - 1][inlist[0]] ] else: tocheck = cds[len(inlist) - 1][inlist[0]] if len(order) > 1: kk = order[1] pc = predictive_check(g, g2, pool[len(inlist)], c[edge_function_idx(kk)], kk) else: pc = set() adder, remover, masker = f[edge_function_idx(key)] checks_ok = c[edge_function_idx(key)] for n in tocheck: if not checks_ok(key, n, g, g2, rate=rate): continue masked = np.prod(masker(g, key, n)) if masked: nodesearch(g, g2, order[1:], [n] + inlist, s, cds, pool, pc) else: mask = adder(g, key, n) nodesearch(g, g2, order[1:], [n] + inlist, s, cds, pool, pc) remover(g, key, n, mask) elif bfu.undersample(g, rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s) > capsize: raise ValueError('Too many elements') return g
def nodesearch(g, g2, order, inlist, s, cds, pool, pc): if order: if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') s.update(supergraphs_in_eq(g, g2, rate=rate)) return g key = order[0] if pc: tocheck = [x for x in pc if x in cds[len(inlist)-1][inlist[0]]] else: tocheck = cds[len(inlist)-1][inlist[0]] if len(order) > 1: kk = order[1] pc = predictive_check(g,g2,pool[len(inlist)], c[edge_function_idx(kk)],kk) else: pc = set() adder, remover, masker = f[edge_function_idx(key)] checks_ok = c[edge_function_idx(key)] for n in tocheck: if not checks_ok(key,n,g,g2,rate=rate): continue masked = np.prod(masker(g,key,n)) if masked: nodesearch(g,g2,order[1:], [n]+inlist, s, cds, pool, pc) else: mask = adder(g,key,n) nodesearch(g,g2,order[1:], [n]+inlist, s, cds, pool, pc) remover(g,key,n,mask) elif bfu.undersample(g,rate)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g
def nodesearch(g, g2, edges, s): if edges: if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g e = edges[0] for n in ln: if (n,e) in single_cache: continue if not ok2addaVpath(e, n, g, g2, rate=rate): continue mask = addaVpath(g,e,n) r = nodesearch(g,g2,edges[1:],s) delaVpath(g,e,n,mask) elif bfu.undersample(g,rate)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements in eqclass') return g
def fastest_g(L): x = [] y = [] for l in L: d = zkl.load(l) for i in range(0,100): gs = bfutils.call_undersamples(d[i]['gt']) #this helps us determine how far u will go for u in range(1,len(d[i]['solutions'])+1): g2 = bfutils.undersample(d[i]['gt'],u) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][u]['ms']) #add the time return x,map(lambda x: x/1000./60., y)
def fastest_g(L): x = [] y = [] for l in L: d = zkl.load(l) for i in range(0, 100): gs = bfutils.call_undersamples( d[i]['gt']) #this helps us determine how far u will go for u in range(1, len(d[i]['solutions']) + 1): g2 = bfutils.undersample(d[i]['gt'], u) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][u]['ms']) #add the time return x, map(lambda x: x / 1000. / 60., y)
def gen_x_y(L): x = [] y = [] for l in L: d = zkl.load(l) for i in range(0,100): gs = bfutils.call_undersamples(d[i]['gt']) #this helps us determine how far u will go for u in range(1,len(d[i]['solutions'])+1): #for u in range(1,min([len(gs),4])): g2 = bfutils.undersample(d[i]['gt'],u) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][u]['ms']) #add the time return x,y
def gen_x_y(L): x = [] y = [] for l in L: d = zkl.load(l) for i in range(0, 100): gs = bfutils.call_undersamples( d[i]['gt']) #this helps us determine how far u will go for u in range(1, len(d[i]['solutions']) + 1): #for u in range(1,min([len(gs),4])): g2 = bfutils.undersample(d[i]['gt'], u) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][u]['ms']) #add the time return x, y
def ra_wrapper_preset(fold, glist=[]): scipy.random.seed() l = {} while True: try: g = glist[fold] gs= bfutils.call_undersamples(g) for u in range(1,min([len(gs),UMAX])): g2 = bfutils.undersample(g,u) print fold,': ',traversal.density(g),':', startTime = int(round(time.time() * 1000)) s = ur.liteqclass(g2, verbose=False, capsize=CAPSIZE) endTime = int(round(time.time() * 1000)) print len(s), u l[u] = {'eq':s,'ms':endTime-startTime} except MemoryError: print 'memory error... retrying' continue break return {'gt':g,'solutions':l}
def wrapper_rate_agnostic(fold, n=10, k=10): scipy.random.seed() l = {} while True: try: g = bfutils.ringmore(n, k) # random ring of given density gs = bfutils.call_undersamples(g) for u in range(1, min([len(gs), UMAX])): g2 = bfutils.undersample(g, u) print fold, ': ', traversal.density(g), ':', startTime = int(round(time.time() * 1000)) s = ur.iteqclass(g2, verbose=False) endTime = int(round(time.time() * 1000)) print len(s) l[u] = {'eq': s, 'ms': endTime - startTime} except MemoryError: print 'memory error... retrying' continue break return {'gt': g, 'solutions': l}
def wrapper_rate_agnostic(fold, n=10, k=10): scipy.random.seed() l = {} while True: try: g = bfutils.ringmore(n,k) # random ring of given density gs= bfutils.call_undersamples(g) for u in range(1,min([len(gs),UMAX])): g2 = bfutils.undersample(g,u) print fold,': ',traversal.density(g),':', startTime = int(round(time.time() * 1000)) s = ur.iteqclass(g2, verbose=False) endTime = int(round(time.time() * 1000)) print len(s) l[u] = {'eq':s,'ms':endTime-startTime} except MemoryError: print 'memory error... retrying' continue break return {'gt':g,'solutions':l}
def addnodes(g,g2,edges): if edges: masks = [] for e in edges: if ok2addanedge_(e[0],e[1],g,g2,rate=rate): masks.append(True) else: masks.append(False) nedges = [edges[i] for i in range(len(edges)) if masks[i]] n = len(nedges) if n: for i in range(n): mask = addanedge(g,nedges[i]) if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) addnodes(g,g2,nedges[:i]+nedges[i+1:]) delanedge(g,nedges[i],mask) return s else: return noop else: return noop
sys.path.append('./tools/') import traversal, bfutils import numpy as np from ortools.constraint_solver import pywrapcp U = 3 # undersampling rate: 1 means no undersampling N = 4 # number of nodes k = 1 # number of extra edges solver = pywrapcp.Solver("MSL") # generate a random graph and undersample g = bfutils.ringmore(N,k) gdens = traversal.density(g) g2 = bfutils.undersample(g,U-1) # undersampled edges dedgeu = {} bedgeu = {} for i in range(N): for j in range(N): dedgeu[(i,j)] = 0 bedgeu[(i,j)] = 0 v = str(i+1) w = str(j+1) if w in g2[v]: if (0,1) in g2[v][w]: dedgeu[(i,j)] = 1 if (2,0) in g2[v][w]: bedgeu[(i,j)] = 1
NODES = 6 DENSITY = 0.2 UMAX = 6 REPEATS = 100 d = zkl.load('leibnitz_nodes_6_density_0.2_ra_.zkl') x = [] y = [] for i in range(0, REPEATS): gs = bfutils.call_undersamples( d[i]['gt']) #this helps us determine how far u will go for u in range(1, min([len(gs), UMAX])): g2 = bfutils.undersample(d[i]['gt'], u) #this is H x.append(traversal.density(g2)) #add the density of H y.append(d[i]['solutions'][u]['ms']) #add the time print len(x) print len(y) fig = plt.figure() ax = plt.gca() ax.scatter(x, y) ax.set_yscale('log') plt.xlabel('edge density of H') plt.ylabel('log scale time') plt.title('Number of Nodes: %s , Density: %s ,UMAX: %s' % (NODES, DENSITY, UMAX)) plt.xlim(0, 1) plt.show()
def main(): g = bfu.ringmore(6,1); H = bfu.undersample(g,1); ss = liteqclass(H) print ss
def ok2addanedge_sub(s, e, g, g2, rate=1): mask = addanedge(g,(s,e)) value = isedgesubset(bfu.undersample(g,rate),g2) delanedge(g,(s,e),mask) return value
def wrapper(fold,n=10,dens=0.1, urate=URATE): scipy.random.seed() rate = urate r = None s = set() counter = 0 while not s: scipy.random.seed() sst = 0.9 r = None while not r: r = lm.getAring(n, dens, sst, False, dist=DIST) print sst, sys.stdout.flush() if sst < 0.03: sst -= 0.001 else: sst -= 0.01 if sst < 0: sst = 0.02 #pprint.pprint(r['transition'].round(2),width=200) #d = zkl.load('leibnitz_nodes_'+str(n)+'_OCE_model_.zkl') #r = d[dens][fold] g = r['graph'] true_g2 = bfu.undersample(g, rate-1) data = lm.drawsamplesLG(r['transition'], samples=BURNIN+SAMPLESIZE*2, nstd=np.double(NOISE_STD)) data = data[:,BURNIN:] if np.max(data) > 1000.: pprint.pprint(r['transition'].round(2),width=200) #raise ValueError startTime = int(round(time.time() * 1000)) if EST=='pc': g2 = pc.dpc(data[:,::rate], pval=0.0001) elif EST=='svar': g2 = lm.data2graph(data[:,::rate]) if trv.density(g2) < 0.7: print gk.OCE(g2,true_g2) #s = examine_bidirected_flips(g2, depth=DEPTH) s = find_nearest_reachable(g2, max_depth=1) #s = trv.v2g22g1(g2, capsize=CAPSIZE, verbose=False) #s = trv.edge_backtrack2g1_directed(g2, capsize=CAPSIZE) #s = timeout(trv.v2g22g1, #s = timeout(trv.edge_backtrack2g1_directed, # args=(g2,CAPSIZE), # timeout_duration=1000, default=set()) print 'o', sys.stdout.flush() if -1 in s: s=set() endTime = int(round(time.time() * 1000)) #if counter > 3: # print 'not found' # return None counter += 1 print '' oce = [gk.OCE(bfu.num2CG(x,n),g) for x in s] cum_oce = [sum(x['directed'])+sum(x['bidirected']) for x in oce] idx = np.argmin(cum_oce) print "{:2}: {:8} : {:4} {:10} seconds".\ format(fold, round(dens,3), cum_oce[idx], round((endTime-startTime)/1000.,3)) #np.set_printoptions(formatter={'float': lambda x: format(x, '6.3f')+", "}) #pprint.pprint(r['transition'].round(2)) #np.set_printoptions() return {'gt':r, 'eq':s, 'OCE':oce[idx], 'tries_till_found': counter, 'estimate': g2, 'graphs_tried': counter, 'strength':sst+0.01, 'ms':endTime-startTime}
def wrapper(fold, n=10, dens=0.1, urate=URATE): scipy.random.seed() rate = urate r = None s = set() counter = 0 while not s: scipy.random.seed() sst = 0.9 r = None while not r: r = lm.getAring(n, dens, sst, False, dist=DIST) print sst, sys.stdout.flush() if sst < 0.03: sst -= 0.001 else: sst -= 0.01 if sst < 0: sst = 0.02 #pprint.pprint(r['transition'].round(2),width=200) #d = zkl.load('leibnitz_nodes_'+str(n)+'_OCE_model_.zkl') #r = d[dens][fold] g = r['graph'] true_g2 = bfu.undersample(g, rate - 1) data = lm.drawsamplesLG(r['transition'], samples=BURNIN + SAMPLESIZE * 2, nstd=np.double(NOISE_STD)) data = data[:, BURNIN:] if np.max(data) > 1000.: pprint.pprint(r['transition'].round(2), width=200) #raise ValueError startTime = int(round(time.time() * 1000)) if EST == 'pc': g2 = pc.dpc(data[:, ::rate], pval=0.0001) elif EST == 'svar': g2 = lm.data2graph(data[:, ::rate]) if trv.density(g2) < 0.7: print gk.OCE(g2, true_g2) #s = examine_bidirected_flips(g2, depth=DEPTH) s = find_nearest_reachable(g2, max_depth=1) #s = trv.v2g22g1(g2, capsize=CAPSIZE, verbose=False) #s = trv.edge_backtrack2g1_directed(g2, capsize=CAPSIZE) #s = timeout(trv.v2g22g1, #s = timeout(trv.edge_backtrack2g1_directed, # args=(g2,CAPSIZE), # timeout_duration=1000, default=set()) print 'o', sys.stdout.flush() if -1 in s: s = set() endTime = int(round(time.time() * 1000)) #if counter > 3: # print 'not found' # return None counter += 1 print '' oce = [gk.OCE(bfu.num2CG(x, n), g) for x in s] cum_oce = [sum(x['directed']) + sum(x['bidirected']) for x in oce] idx = np.argmin(cum_oce) print "{:2}: {:8} : {:4} {:10} seconds".\ format(fold, round(dens,3), cum_oce[idx], round((endTime-startTime)/1000.,3)) #np.set_printoptions(formatter={'float': lambda x: format(x, '6.3f')+", "}) #pprint.pprint(r['transition'].round(2)) #np.set_printoptions() return { 'gt': r, 'eq': s, 'OCE': oce[idx], 'tries_till_found': counter, 'estimate': g2, 'graphs_tried': counter, 'strength': sst + 0.01, 'ms': endTime - startTime }
sys.path.append('./tools/') import traversal, bfutils import numpy as np from ortools.constraint_solver import pywrapcp U = 2 # undersampling rate: 1 means no undersampling N = 10 # number of nodes k = 25 # number of extra edges solver = pywrapcp.Solver("MSL") # generate a random graph and undersample g = bfutils.ringmore(N,k) gdens = traversal.density(g) g2 = bfutils.undersample(g,U-1) # undersampled edges dedgeu = {} bedgeu = {} for i in range(N): for j in range(N): dedgeu[(i,j)] = 0 bedgeu[(i,j)] = 0 v = str(i+1) w = str(j+1) if w in g2[v]: if (0,1) in g2[v][w]: dedgeu[(i,j)] = 1 if (2,0) in g2[v][w]: bedgeu[(i,j)] = 1
def backtrack_more(g2, rate=1, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) single_cache = {} if rate == 1: ln = [n for n in g2] else: ln = [] for x in itertools.combinations_with_replacement(g2.keys(),rate): ln.extend(itertools.permutations(x,rate)) ln = set(ln) @memo # memoize the search def nodesearch(g, g2, edges, s): if edges: if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g e = edges[0] for n in ln: if (n,e) in single_cache: continue if not ok2addaVpath(e, n, g, g2, rate=rate): continue mask = addaVpath(g,e,n) r = nodesearch(g,g2,edges[1:],s) delaVpath(g,e,n,mask) elif bfu.undersample(g,rate)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements in eqclass') return g # find all directed g1's not conflicting with g2 n = len(g2) edges = gk.edgelist(g2) random.shuffle(edges) g = cloneempty(g2) for e in edges: for n in ln: mask = addaVpath(g,e,n) if not isedgesubset(bfu.undersample(g,rate), g2): single_cache[(n,e)] = False delaVpath(g,e,n,mask) s = set() try: nodesearch(g,g2,edges,s) except ValueError: s.add(0) return s
def ok2addanedge2(s, e, g, g2, rate=1): mask = addanedge(g,(s,e)) value = bfu.undersample(g,rate) == g2 delanedge(g,(s,e),mask) return value
def main(): g = bfu.ringmore(6, 1) H = bfu.undersample(g, 1) ss = iteqclass(H) print ss