def load_link_pairs(newpairs, geoipdb=None): global lua_push_unique r = connection.Redis() if lua_push_unique is None: lua_push_unique = r.register_script(""" local exists exists = redis.call("EXISTS", KEYS[1]) if exists == 0 then redis.call("LPUSH", "delayed_job:unassigned_links", KEYS[1]) end redis.call("SADD", KEYS[1], ARGV[1]) return redis.status_reply("OK") """) with r.pipeline() as pipe: for link in newpairs: if link[0] == link[1]: raise Exception("Should not happen") lua_push_unique( keys=[dbkeys.delay_key(link[0], link[1])], args=[link[2]], client=pipe) pipe.sadd('iplist', *link[:2]) for ip, asn in itertools.izip(link, geoipdb.lookup_ips(link[:2])): pipe.hmset(dbkeys.ip_key(ip), {'asn': asn}) pipe.execute()
def add_asn_endpoints(vertex_list, linklist, datafile, count, endpointtype): """ Add endpoint nodes that connect to the graph based on ASNs. @param vertex_list: An instance of VertexList @type vertex_list: C{VertexList} @param linklist: A tuple containing a list of links, and an attribute dictionary @type linklist: C{tuple} @param datafile: Path to a datafile that contains at least two columns called 'Number' and 'ASN'. The 'Number' column should indicate the relative number of endpoints that should connect to that ASN. @type datafile: C{str} @param count: The number of endpoints to add. @type count: C{int} @param endpointtype: The label for the endpoint. 'client' is a good example. @type endpointtype: C{str} """ r = connection.Redis() try: cdata_file = datautil.DataFile(datafile, sep="|") cdata_file.add_index('ASN') except IOError: logging.error(Color.fail("Error reading %s" % datafile)) sys.exit(-1) attach = dict() for asn in cdata_file['ASN']: try: pop = find_pop_for_asn(asn) attach[asn] = (pop, cdata_file['ASN'][asn][0]['Number']) except graph_objects.ASNNotKnown: pass def node_id(asn, unique): return "%s_%s_%s" % (endpointtype, asn, unique) counter = 0 if len(attach) == 0: sys.stderr.write(Color.fail( "[failed] No %s could be attached.\n" % endpointtype)) logging.error(Color.fail( "[failed] No %s could be attached." % endpointtype)) else: total = sum(map(lambda x: int(x[1]), attach.itervalues())) for asn, data in attach.iteritems(): num_to_attach = round(count * (float(data[1]) / float(total))) for j in xrange(0, int(num_to_attach)): vertex_list.add_vertex(node_id(asn, j), nodeid=node_id(asn, j), nodetype=endpointtype, asn=asn) linkkey = dbkeys.Link.intralink(data[0]) linkdelays = [ delay for edge in r.smembers(linkkey) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: latency = util.decile_transform(linkdelays) except util.EmptyListError: latency = [5 for x in xrange(10)] linklist.append(EdgeLink(node_id(asn, j), data[0], {'latency': latency, 'med_latency': latency[len(latency)/2]})) counter += 1 log.info(Color.wrapformat("Success [{0} attached]", Color.OKBLUE, counter)) return (counter, len(attach))
def add_alexa_destinations(vertex_list, linklist, count): """ Add potential destination endpoints based on the top 10000 destinations """ r = connection.Redis() aslookup = preprocess.MaxMindGeoIPReader.Instance() attached = 0 failed = 0 pops = set() with pkg_resources.resource_stream( 'inettopology_popmap.resources', 'alexa_top_dests.txt') as destlist: for line in destlist: if line[0] == '#': continue ip, url, matched_ip, matched_bits = line.split() db_ip_pop = dbkeys.get_pop(matched_ip) if db_ip_pop is None: log.debug("Couldn't attach {0} with ip {1}. No matching IP found" .format(url, matched_ip)) failed += 1 continue nodeid = "dest_{0}".format(ip.replace('.', '_')) if nodeid in vertex_list: continue # Don't add the same url twice countries = r.smembers(dbkeys.POP.countries(db_ip_pop)) if len(countries) == 1: country = countries.pop() else: country = aslookup.lookup_country_codes(matched_ip)[0] pops.add(db_ip_pop) vertex_list.add_vertex(nodeid, nodeid=nodeid, nodetype="dest", url=url, ip=ip, asn=r.get(dbkeys.POP.asn(db_ip_pop)), country=country) linkkey = dbkeys.Link.intralink(db_ip_pop) linkdelays = [ delay for edge in r.smembers(linkkey) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: latency = util.decile_transform(linkdelays) except util.EmptyListError: latency = [5 for x in xrange(10)] linklist.append( EdgeLink(nodeid, db_ip_pop, {'latency': latency, 'med_latency': latency[len(latency)/2]})) attached += 1 if attached % 10 == 0: log.info("Attached {0} destinations. Couldn't attach {1}" .format(attached, failed)) if attached >= count: break return (attached, len(pops))
def load_from_redis(r, args): """ Create a igraph graph from redis """ log.info("Loading from Redis") linkdict = LinkDict(r) vertices = VertexList() tor_vertices = set() graphlinks = [] graphattrs = dict() graphattrs['latency'] = [] stats = Stats({'non-pop-trim': int, 'unattachable-poi-count': int, 'poi-latency-defaulted': int, 'unattachable-poi': set, 'num-pois': int, 'num-pops': int, 'num-links': int, 'num-clients': int, 'client-connect-points': int}) pipe = r.pipeline() i = 0 #Obtain the set of Tor relay IPs log.info("Reading Tor relays from %s... " % args.pointsofinterest) try: with open(args.pointsofinterest) as f: PoIs = json.load(f) except IOError as e: log.info("Error: [%s]" % e) raise log.info(Color.wrap("Done", Color.OKBLUE)) log.info("Attaching clients to graph.") #Add clients if args.num_clients: clients_attached, client_attach_points = add_asn_endpoints( vertices, graphlinks, args.client_data, args.num_clients, 'client') log.info("Attached {0} clients to {1} attachment points".format( clients_attached, client_attach_points)) log.info("Attaching destinations to graph.") #Add dests if args.num_dests: dests_attached, dest_attach_points = add_alexa_destinations( vertices, graphlinks, args.num_dests) log.info("Attached {0} dests to {0} attachment points".format( dests_attached, dest_attach_points)) protected = set() protected.update([poi['pop'] for poi in PoIs]) protected.update(vertices.keys()) # We want to trim all of the hanging edges of the graph. log.info("Trimming degree-1 vertices...") found_hanging_edge = True pass_ctr = 0 while found_hanging_edge: pass_ctr += 1 found_hanging_edge = False removed = set() n = 0 timer = ProgressTimer(len(linkdict)) for pop in linkdict.keys(): if n % 100 == 0 or n == timer.total - 1: timer.tick(100) sys.stderr.write( "{0}Pass {1}: {2} {3}".format( Color.NEWL, pass_ctr, Color.wrapformat("[{0} processed, {1} trimmed]", Color.HEADER, n, stats['non-pop-trim']), Color.wrapformat("[eta:{0}]", Color.OKGREEN, timer.eta()) )) n += 1 if pop in removed: continue # we saw this already if len(linkdict[pop]) >= 2: continue # it can stay if pop in protected: continue # We need relay/client/dest connect point # It's only connected to one connected = linkdict[pop].pop() removed.add(pop) del linkdict[pop] linkdict[connected].remove(pop) if len(linkdict[connected]) == 0: # This was a matched pair attached to nothing else del linkdict[connected] removed.add(connected) stats.incr('non-pop-trim') found_hanging_edge = True sys.stderr.write("\n") linkdict.collapse_degree_two(protected=protected) log.info("Trimmed {non-pop-trim} degree two hops".format(**stats)) #Set vertex id's for all of the pops we have links for. log.info("Adding PoPs...") for pop in linkdict.iterkeys(): if pop in vertices: continue # we have this one already. vertices.add_vertex(pop, nodeid=pop, nodetype='pop', asn=r.get(dbkeys.POP.asn(pop)), countries=r.smembers(dbkeys.POP.countries(pop))) stats.incr('num-pops') i += 1 log.info(Color.wrapformat("Added [{0}]", Color.OKBLUE, stats['num-pops'])) #Attach the relays for poi in PoIs: if poi['pop'] not in vertices: log.warn("Matched relay to {0}, but couldn't find it " "in vertices".format(poi['pop'])) stats.incr('unattachable-poi-count') stats.incr('unattachable-poi', poi['id']) continue vertices.add_vertex(poi['id'], nodeid=poi['id'], nodetype='relay', **poi) linkdelays = [ delay for edge in r.smembers(dbkeys.Link.intralink(poi['pop'])) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: deciles = util.decile_transform(linkdelays) except util.EmptyListError: deciles = [5 for x in xrange(10)] stats.incr('poi-latency-defaulted') graphlinks.append(EdgeLink(poi['id'], poi['pop'], {'latency': deciles, 'med_latency': deciles[len(deciles)/2]})) stats.incr('num-pois') tor_vertices.add(poi['id']) i += 1 log.info("Added {0} PoIs. Did not attach {1} " "whose connection point was not linked to anything." .format(stats['num-pois'], stats['unattachable-poi-count']) ) log.info("PoIs defaulted to 5ms links: [{0}]".format( stats['poi-latency-defaulted'])) pipe.execute() already_processed = set() log.info("Processing links... ") i = 0 for pop1 in linkdict.iterkeys(): if pop1 not in vertices: continue for pop2 in linkdict[pop1]: if (pop2 not in vertices or dbkeys.Link.interlink(pop1, pop2) in already_processed): continue linkkey = dbkeys.Link.interlink(pop1, pop2) linkdelays = [ delay for edge in r.smembers(linkkey) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: latency = util.decile_transform(linkdelays) except util.EmptyListError: latency = eval(r.get("graph:collapsed:%s" % (dbkeys.Link.interlink(pop1, pop2)))) graphlinks.append(EdgeLink(pop1, pop2, {'latency': latency, 'med_latency': latency[len(latency)/2]})) stats.incr('num-links') already_processed.add(dbkeys.Link.interlink(pop1, pop2)) i += 1 sys.stderr.write("{0}Processed links for {1} pops" .format(Color.NEWL, i)) log.info("Processed {0} pop links " .format(stats['num-links'])) log.info("Making Graph") with open("vertices.dat", 'w') as vertout: vertices.write(vertout) gr = nx.Graph() gr.add_nodes_from(vertices.nx_tuple_iter()) gr.add_edges_from([edge.nx_tuple() for edge in graphlinks]) try: bfs_edges = nx.bfs_edges(gr, linkdict.max_degree()) except: print "Something was wrong with: %s" % linkdict.max_degree() raise bfs_node_gen = (node for pair in bfs_edges for node in pair) subgraph = gr.subgraph(bfs_node_gen) assert nx.is_connected(subgraph) log.info("BFS reduced graph from {0} to {1} vertices".format( len(gr), len(subgraph))) log.info("Writing data file") nx.write_graphml(subgraph, args.reload) log.info("Wrote files") log.info("STATS:") for key, val in stats.iteritems(): log.info("{0}: {1}".format(key, val)) return gr
def collapse_degree_two(self, protected=[]): log.info("Cleaning up collapse dbkeys...") r = connection.Redis() p = r.pipeline() for key in r.keys("graph:collapsed:*"): p.delete(key) write_failed(p.execute()) pass_ctr = 0 collapsable = True ignoreable = set() clogout = open('collapse.log', 'w') while collapsable: pass_ctr += 1 sys.stderr.write("\n") collapsable = False degree2nodes = filter( lambda val: (len(val[1]) == 2 and val[0] not in ignoreable), self.iteritems()) counter = 0 n = 0 deferred = 0 collapsed = set() timer = ProgressTimer(len(degree2nodes)) for node, connections in degree2nodes: if n % 50 == 0 or n == timer.total - 1: timer.tick(50) sys.stderr.write( "{0}Pass {1}: {2} {3}".format( Color.NEWL, pass_ctr, Color.wrapformat( "[{0} processed, {1} collapsed, {2} deferred]", Color.HEADER, n, counter, deferred ), Color.wrapformat( "[eta: {0}]", Color.OKGREEN, timer.eta() )) ) n += 1 asns = [r.get(dbkeys.POP.asn(x)) for x in connections | set([node])] countries = [r.smembers(dbkeys.POP.countries(x)) for x in connections | set([node])] same_asn = reduce(lambda x, y: x if x == y else False, asns) same_country = True for x, y in pairwise(countries): if x & y != x: same_country = False if (same_asn is False or same_country is False or node in protected): ignoreable.update(connections | set([node])) continue if len(collapsed & (connections | set([node]))) != 0: deferred += 1 continue collapsed.update(connections | set([node])) side1 = connections.pop() side2 = connections.pop() connections.update(set([side1, side2])) try: #side1_delay = median(get_delays(dbkeys.Link.interlink(node, side1))) side1_delays = decile_transform( [float(delay) for edge in r.smembers(dbkeys.Link.interlink(node, side1)) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))]) except: side1_delays = eval(r.get("graph:collapsed:%s" % (dbkeys.Link.interlink(node, side1)))) try: #side2_delay = median(get_delays(dbkeys.Link.interlink(node, side2))) side2_delays = decile_transform( [float(delay) for edge in r.smembers(dbkeys.Link.interlink(node, side2)) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))]) except: side2_delays = eval(r.get("graph:collapsed:%s" % (dbkeys.Link.interlink(node, side2)))) combined_delays = [s1 + s2 for s1 in side1_delays for s2 in side2_delays] r.set('graph:collapsed:%s' % (dbkeys.Link.interlink(*list(connections))), decile_transform(combined_delays)) clogout.write("Collapsed %s <-> %s <-> %s\n" % (side1, node, side2)) collapsable = True del self[node] self[side1].add(side2) self[side2].add(side1) self[side1].remove(node) self[side2].remove(node) counter += 1 clogout.close()