def naively_expand_query_nodes(graph_db, query_nodes, a = 1): api_calls = 0 budget = a + 1 # initialize graph graph = nx.Graph() if len(query_nodes) == 0: return graph # we add query nodes for node in query_nodes: graph.add_node(node) opened_nodes = set([]) frontier = set(query_nodes[:]) step = 0 while step < budget and frontier: for f in frontier: neighbors = graph_db.get_neighbors(f) if neighbors: graph.add_edges_from([(f,n) for n in neighbors]) api_calls = api_calls + len(frontier) opened_nodes |= frontier frontier = set(graph.nodes()) - opened_nodes step = step + 1 return (graph, api_calls)
def expand_query_nodes(graph_db, query_nodes, a = 1): api_calls = 0 budget = a + 1 # initialize graph graph = nx.Graph() if len(query_nodes) == 0: return graph # we add query nodes for node in query_nodes: graph.add_node(node) # graph.add_nodes_from(query_nodes) opened_nodes = set([]) frontier = _get_new_frontier(graph, opened_nodes, query_nodes, budget) while frontier: for f in frontier: neighbors = graph_db.get_neighbors(f) if neighbors: graph.add_edges_from([(f,n) for n in neighbors]) api_calls = api_calls + len(frontier) opened_nodes |= set(frontier) frontier = _get_new_frontier(graph, opened_nodes, query_nodes, budget) # print(api_calls) # if api_calls > 10000: # sys.exit() #print(frontier) return (graph, api_calls)
def smart_expand(graph_db, query_nodes, a = 1, approx_threshold = 1.00, samples = 1): api_calls = 0 num_of_edges = 0 schedule = 64 schedule_multiplier = 2 # initialize graph graph = nx.Graph() if len(query_nodes) == 0: return (graph, 0) # we add query nodes for node in query_nodes: graph.add_node(node) # maintain a list of nodes on the frontier nodes_seen = set(query_nodes) nodeComponent = {} components = [] for q in query_nodes: comp = Component( set([q]) ) components.append( comp ) nodeComponent[q] = comp current_best = 1.0 upper_bound = 1.0 * len(query_nodes) approx_ratio = current_best / upper_bound # print(approx_ratio, current_best, upper_bound) while would_you_expand(components) and len(components) > 1 and (approx_ratio < approx_threshold): # select the next node to expand next_components = [] for c in components: if not c.is_active() or len(c.nodes_to_expand) == 0: continue lucky_node = heappop( c.nodes_to_expand )[1] comp_of_lucky_node = nodeComponent.get(lucky_node, None) assert comp_of_lucky_node != None neighbors = graph_db.get_neighbors(lucky_node) if neighbors == None: neighbors = [] api_calls += 1 for nbor in neighbors: if not nbor in nodes_seen: # nodes_to_expand.add(nbor) heappush( c.nodes_to_expand, (random.random(), nbor) ) nodes_seen.add(nbor) nodeComponent[nbor] = comp_of_lucky_node comp_of_lucky_node.add(nbor) comp_of_lucky_node.add_edge( lucky_node, nbor ) else: comp_of_nbor = nodeComponent.get(nbor,None) assert comp_of_nbor != None if comp_of_lucky_node != comp_of_nbor: union = Component( comp_of_lucky_node, comp_of_nbor ) union.add_edge( lucky_node, nbor ) comp_of_lucky_node.set_inactive() comp_of_nbor.set_inactive() next_components.append(union) for u in union.S: nodeComponent[u] = union comp_of_lucky_node = union graph.add_edge(lucky_node, nbor) num_of_edges += 1 components = [c for c in components + next_components if c.is_active()] if api_calls >= schedule: if schedule >= 8192: schedule += 8192 else: schedule *= schedule_multiplier best_with = [1] best_ever = [1] # for each connected component, pick one node at the frontier for comp in components: partial_solution = comp.get_partial_solution( graph, query_nodes, a, samples) if partial_solution != None: best_with.append(partial_solution[0][0]) best_ever.append(partial_solution[1][0]) current_best = max(current_best, max(best_ever)) upper_bound = sum([sc for sc in best_with if sc > 0]) approx_ratio = 1.0 * current_best / upper_bound sys.stderr.write(str(api_calls) + '\t' + str(len(components)) + '\t' + str(num_of_edges) + '\t' + str(approx_ratio) + '\n') comp_sizes = [len(c.S) for c in components] comp_sizes.sort(reverse = True) sys.stderr.write(str(comp_sizes) + '\n') return (graph, api_calls)