#!/usr/bin/python ''' Created on Jun 7, 2010 @author: jose ''' #!/usr/bin/python from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None debug = True if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) graph.debug = debug print 'indexing with create_index_kcores()' graph.create_index_kcores()
#!/usr/bin/python ''' Created on May 11, 2010 @author: jose ''' #!/usr/bin/python from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) print 'indexing with create_index_knn()' graph.create_index_knn()
import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive' print 'Error: second argument missing, output filename!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) out = open(outname, 'w') total_nodes = graph.number_of_nodes() count = 0 for node, clustering in graph.get_parameter_cache_iter('clustering'): if count % 10000 == 0: print 'INFO: exporting clustering %d nodes of %d total nodes' % (count, total_nodes) out.write('%s\t\t%f\n' % (node, clustering) ) count += 1
#!/usr/bin/python ''' Created on Jul 27, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) graph.debug = True print 'indexing with create_index_triangles()' graph.create_index_triangles()
import sys print sys.argv filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**18 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) print 'indexing with create_index_degree()' graph.create_index_degree() print 'indexing with create_index_knn()' graph.create_index_knn() print 'indexing with create_index_clustering()' graph.create_index_clustering() print 'indexing with create_index_kcores()' graph.create_index_kcores() print 'indexing with create_index_triangles()' graph.create_index_triangles()
import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive' print 'Error: second argument missing, output filename!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) out = open(outname, 'w') total_nodes = graph.number_of_nodes() count = 0 for node, shell in graph.get_parameter_cache_iter('shell'): if count % 10000 == 0: print 'INFO: exporting knn %d nodes of %d total nodes' % (count, total_nodes) out.write('%s\t\t%f\n' % (node, shell) ) count += 1
#!/usr/bin/python ''' Created on Jul 30, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'indexing with create_index_seen_triangles()' graph.add_parameter_cache('seen_triangles') graph.initialize_parameter('seen_triangles', 0.0) graph.index_parameter_cache('seen_triangles') graph.add_parameter_cache('seen_triangles2') graph.initialize_parameter('seen_triangles2', 0.0) graph.index_parameter_cache('seen_triangles2') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on Aug 1, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'indexing with create_index_seen_degree()' graph.add_parameter_cache('seen_degree') graph.initialize_parameter('seen_degree', 0.0) graph.index_parameter_cache('seen_degree') graph.add_parameter_cache('seen_degree2') graph.initialize_parameter('seen_degree2', 0.0) graph.index_parameter_cache('seen_degree2') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on Jun 29, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('knn') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on Jun 29, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('clustering') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on Jun 29, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('shell') if __name__ == '__main__': pass
else: q.put('FINISHED') create_dbs = True if create_dbs: for strat_name in strats: for l in lookaheads: new_filename = filename + '.%s.lookahead%d' % (strat_name, l) print 'creating BigGraph ' + new_filename os.system('cp %s %s' % (filename, new_filename)) print 'choosing random node seed for crawlers...' graph = BigGraph(filename, cache_size_pages) node_rand_seed = graph.random_nodes()[0] del graph print 'done. choosed -> %s' % str(node_rand_seed) processors = len(strats) * len(lookaheads) # 4 out = open(outname, 'w') processes = [] graphs = [] list_of_strategies = [] results = 0 for strat_name in strats: for l in lookaheads:
#!/usr/bin/python ''' Created on May 11, 2010 @author: jose ''' #!/usr/bin/python from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) print 'indexing with create_index_degree()' graph.create_index_degree()
digraph = BigDiGraph(graphfile + '.big_digraph') digraph.debug = True digraph.input_debug_links = 200000 digraph.output_debug_nodes = 100 digraph.max_links_input = links digraph.max_nodes_analysis = 10000 print 'digraph.load_edgelist(open(filename)) ...' digraph.load_edgelist(open(filename)) print 'digraph.create_indices() ...' digraph.create_indices() graph = BigGraph(graphfile + '.disconnected') print 'digraph.add_only_symmetric_edgelist(graph) ...' digraph.add_only_symmetric_edgelist(graph) print 'graph.create_indices() ...' graph.create_indices() number_of_nodes = graph.number_of_nodes() comps = [ len(comp) / float(number_of_nodes) for comp in graph.connected_components() ] if comps[0] < 0.5: print 'ERROR: biggest connected componnet not found!'
import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive' print 'Error: second argument missing, output filename!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) out = open(outname, 'w') total_nodes = graph.number_of_nodes() count = 0 for node, degree in graph.get_parameter_cache_iter('degree'): if count % 10000 == 0: print 'INFO: exporting degree %d nodes of %d total nodes' % ( count, total_nodes) out.write('%s\t\t%f\n' % (node, degree)) count += 1
import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive' print 'Error: second argument missing, output filename!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) graph.debug = True out = open(outname, 'w') total_nodes = graph.number_of_nodes() count = 0 for node, triangles in graph.get_parameter_cache_iter('triangles'): if count % 10000 == 0: print 'INFO: exporting triangles %d nodes of %d total nodes' % ( count, total_nodes) out.write('%s\t\t%f\n' % (node, triangles)) count += 1
#!/usr/bin/python ''' Created on May 23, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'indexing with create_index_unseen_triangles()' graph.index_parameter_from_parameter('triangles', 'unseen_triangles') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on Oct 4, 2010 @author: jose ''' from cloudlight import BigGraph import sys if __name__ == '__main__': print 'Starting dump_snowball' filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: print 'Error: first argument missing, output filename with edgelist archive!' exit(-1) g = BigGraph(filename) g.debug = True g.save_snowball_edgelist_iter(outname)
if not outname: #outname = 'orkut-2k_sym.big_graph' print 'Error: second argument missing, output filename!' exit(-1) links = len(sys.argv) > 3 and sys.argv[3] or None if not links: #links = 2000 print 'Error: third argument missing, max number of links!' exit(-1) else: links = int(links) graph = BigGraph(outname) graph.debug = True graph.input_debug_links = 200000 graph.output_debug_nodes = 10000 graph.max_links_input = links graph.max_nodes_analysis = 10000 graph.load_edgelist(open(filename)) graph.create_indices() print 'output Graph (possibly disconnected):' print 'nodes = %d' % graph.number_of_nodes() print 'edges = %d' % graph.number_of_edges()
filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: #filename = 'orkut-links-fst.txt.toundirected.30mill' print 'Error: first argument missing, input filename with space separated graph!' exit(-1) outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = 'orkut-2k_sym.big_graph' print 'Error: second argument missing, output filename!' exit(-1) graphfile = filename graph = BigGraph(graphfile) graph.debug = True graph.input_debug_links = 200000 graph.output_debug_nodes = 10000 print 'NODES:' print graph.number_of_nodes() print 'EDGES:' print graph.number_of_edges() print 'dumping Graph to edge list file ...' graph.save_edgelist(outname) print 'done.'
print sys.argv filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph!' exit(-1) outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: print 'Error: second argument missing, output filename for BigGraph!' exit(-1) graph = BigGraph(filename) #graph = BigGraph() #graph.add_edge(1,2) #graph.add_edge(2,3) #graph.add_edge(3,1) #graph.add_edge(4,5) aux_graph = Graph() connected_graph = BigGraph(outname) graph.add_random_component(aux_graph) for src, dst in aux_graph.edges_iter(): connected_graph.add_edge(src, dst)
#!/usr/bin/python ''' Created on Jul 27, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('triangles') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on May 23, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'indexing with create_index_unseen_degree()' graph.create_index_unseen_degree() if __name__ == '__main__': pass
if not type: print 'Error: second argument missing, input sphere type (link or node all )!' exit(-1) lookahead = len(sys.argv) > 3 and int(sys.argv[3]) or None if not lookahead and type != 'all': print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...' exit(-1) # cache size in 2KB pages (?) cache_size = 2**18 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) graph.debug = True if type == 'link': print 'indexing with create_index_linksphere( lookahead = %d)' % lookahead graph.create_index_linksphere(lookahead) elif type == 'node': print 'indexing with create_index_nodesphere( lookahead = %d)' % lookahead graph.create_index_nodesphere(lookahead) elif type == 'all': #for lookahead in [1,2,3]:
lookahead = len(sys.argv) > 3 and int(sys.argv[3]) or None if not lookahead: print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...' exit(-1) outname = len(sys.argv) > 4 and sys.argv[4] or None if not outname: #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive' print 'Error: second argument missing, output filename!' exit(-1) print 'opening BigGraph ' + filename graph = BigGraph(filename) out = open(outname, 'w') total_nodes = graph.number_of_nodes() count = 0 for node, clustering in graph.get_parameter_cache_iter('%ssphere%d' % (type, lookahead)): if count % 10000 == 0: print 'INFO: exporting clustering %d nodes of %d total nodes' % ( count, total_nodes) out.write('%s\t\t%f\n' % (node, clustering)) count += 1
if not type: print 'Error: second argument missing, input sphere type (link o node)!' exit(-1) lookahead = len(sys.argv) > 3 and int(sys.argv[3]) or None if not lookahead and type != 'all': print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...' exit(-1) # cache size in 2KB pages (?) cache_size = 2**18 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) if type == 'link' or type == 'node' : print 'dropping index with remove_parameter_cache() %s lookahead %d' % (type, lookahead) graph.remove_parameter_cache('%ssphere%d' % (type, lookahead) ) elif type == 'all': #for lookahead in [1,2,3]: for lookahead in [1, 2]: type = 'node' print 'dropping index with remove_parameter_cache() %s lookahead %d' % (type, lookahead) graph.remove_parameter_cache('%ssphere%d' % (type, lookahead) )
#!/usr/bin/python ''' Created on May 11, 2010 @author: jose ''' #!/usr/bin/python from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) print 'indexing with create_index_clustering()' graph.create_index_clustering()
#!/usr/bin/python ''' Created on Aug 1, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('seen_degree') if __name__ == '__main__': pass
#!/usr/bin/python ''' Created on May 27, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'nodes: %d' % graph.number_of_nodes() print 'edges: %d' % graph.number_of_edges()