def test_graph2(): sym = True adj = np.array([ [0, 1, 0, 16], [2, 4, 0, 14], [4, 5, 0, 4], [0, 2, 1, 13], [2, 1, 1, 4], [3, 5, 1, 20], [1, 3, 2, 12], [3, 2, 2, 9], [4, 3, 2, 7] ]) shape = (6, 6, 3) G = make_graph(adj[:,:3], shape, values=adj[:,3], sym=sym, display=False) G.sources = np.repeat(np.arange(G.N), np.diff(G.csr.indptr)) G.targets = G.csr.indices % G.N cost_vec = G.indeg_vec print "Original graph:\n", G # Successive shortest path algorithm s, p, o = 0, 2, 5 expect = 2.88888888889 mincostflow = succ_shortest_path(G, cost_vec, s, p, o) print mincostflow assert np.allclose(mincostflow.flow, expect) print 'Recovered max-flow edges (i, j, r, flow)..' adj = np.zeros((len(mincostflow.edges), 4)) for i, (k, v) in enumerate(mincostflow.edges.iteritems()): adj[i, :] = np.array([k[0], k[1], k[2], v]) adj = adj[np.lexsort((adj[:,2], adj[:,1], adj[:,0])),:] print adj print ''
def test_graph1(): sym = True adj = np.array([[0, 2, 0, 10], [1, 2, 0, 30], [0, 1, 1, 20], [1, 3, 1, 10], [2, 3, 1, 20]]) shape = (4, 4, 2) G = make_graph(adj[:, :3], shape, values=adj[:, 3], sym=sym, display=False) G.sources = np.repeat(np.arange(G.N), np.diff(G.csr.indptr)) G.targets = G.csr.indices % G.N cost_vec = G.indeg_vec print("Original graph:\n", G) # Successive shortest path algorithm s, p, o = 0, 1, 3 expect = 6.42857142857 mincostflow = succ_shortest_path(G, cost_vec, s, p, o) print(mincostflow) assert np.allclose(mincostflow.flow, expect) print('Recovered max-flow edges (i, j, r, flow)..') adj = np.zeros((len(mincostflow.edges), 4)) for i, (k, v) in enumerate(mincostflow.edges.items()): adj[i, :] = np.array([k[0], k[1], k[2], v]) adj = adj[np.lexsort((adj[:, 2], adj[:, 1], adj[:, 0])), :] print(adj) print('')
def test_dbpedia(): dirpath = abspath(expanduser('./data/kg/_undir/')) shape = (6060993, 6060993, 663) G = Graph.reconstruct(dirpath, shape, sym=True) cost_vec = np.log(G.indeg_vec) s, p, o = 2145431, 178, 459128 # Gravity, Alfonso Cuarón mincostflow = succ_shortest_path(G, cost_vec, s, p, o) print mincostflow
def compute_mincostflow(G, relsim, subs, preds, objs, flowfile): """ Parameters: ----------- G: rgraph See `datastructures`. relsim: ndarray A square matrix containing relational similarity scores. subs, preds, objs: sequence Sequences representing the subject, predicate and object of input triples. flowfile: str Absolute path of the file where flow will be stored as JSON, one line per triple. Returns: -------- mincostflows: sequence A sequence containing total flow for each triple. times: sequence Times taken to compute stream of each triple. """ # take graph backup G_bak = { 'data': G.csr.data.copy(), 'indices': G.csr.indices.copy(), 'indptr': G.csr.indptr.copy() } cost_vec_bak = np.log(G.indeg_vec).copy() # some set up G.sources = np.repeat(np.arange(G.N), np.diff(G.csr.indptr)) G.targets = G.csr.indices % G.N cost_vec = cost_vec_bak.copy() indegsim = weighted_degree(G.indeg_vec, weight=WTFN) specificity_wt = indegsim[G.targets] # specificity relations = (G.csr.indices - G.targets) / G.N mincostflows, times = [], [] with open(flowfile, 'w', 0) as ff: for idx, (s, p, o) in enumerate(zip(subs, preds, objs)): s, p, o = [int(x) for x in (s, p, o)] ts = time() print '{}. Working on {} .. '.format(idx + 1, (s, p, o)), sys.stdout.flush() # set weights relsimvec = np.array(relsim[p, :]) # specific to predicate p relsim_wt = relsimvec[relations] G.csr.data = np.multiply(relsim_wt, specificity_wt) # compute mcflow = succ_shortest_path(G, cost_vec, s, p, o, return_flow=False, npaths=5) mincostflows.append(mcflow.flow) ff.write(json.dumps(mcflow.stream) + '\n') tend = time() times.append(tend - ts) print 'mincostflow: {:.5f}, #paths: {}, time: {:.2f}s.'.format( mcflow.flow, len(mcflow.stream['paths']), tend - ts) # reset state of the graph np.copyto(G.csr.data, G_bak['data']) np.copyto(G.csr.indices, G_bak['indices']) np.copyto(G.csr.indptr, G_bak['indptr']) np.copyto(cost_vec, cost_vec_bak) return mincostflows, times