示例#1
0
#!/usr/bin/python
'''
Created on Jun 7, 2010

@author: jose
'''

#!/usr/bin/python
from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

debug = True

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)
graph.debug = debug

print 'indexing with create_index_kcores()'
graph.create_index_kcores()
示例#2
0
#!/usr/bin/python
'''
Created on Aug 1, 2010

@author: jose
'''
from cloudlight import BigGraph

import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('seen_degree')



if __name__ == '__main__':
    pass
示例#3
0
filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    #filename = 'orkut-links-fst.txt.toundirected.30mill'
    print 'Error: first argument missing, input filename with space separated graph!'
    exit(-1)

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = 'orkut-2k_sym.big_graph'
    print 'Error: second argument missing, output filename!'
    exit(-1)

graphfile = filename

graph = BigGraph(graphfile)

graph.debug = True
graph.input_debug_links = 200000
graph.output_debug_nodes = 10000

print 'NODES:'
print graph.number_of_nodes()
print 'EDGES:'
print graph.number_of_edges()
print 'dumping Graph to edge list file ...'
graph.save_edgelist(outname)
print 'done.'
示例#4
0
digraph = BigDiGraph(graphfile + '.big_digraph')

digraph.debug = True
digraph.input_debug_links = 200000
digraph.output_debug_nodes = 100

digraph.max_links_input = links
digraph.max_nodes_analysis = 10000

print 'digraph.load_edgelist(open(filename)) ...'
digraph.load_edgelist(open(filename))

print 'digraph.create_indices() ...'
digraph.create_indices()

graph = BigGraph(graphfile + '.disconnected')

print 'digraph.add_only_symmetric_edgelist(graph) ...'
digraph.add_only_symmetric_edgelist(graph)

print 'graph.create_indices() ...'
graph.create_indices()

number_of_nodes = graph.number_of_nodes()
comps = [
    len(comp) / float(number_of_nodes)
    for comp in graph.connected_components()
]

if comps[0] < 0.5:
    print 'ERROR: biggest connected componnet not found!'
示例#5
0
print sys.argv

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph!'
    exit(-1)

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    print 'Error: second argument missing, output filename for BigGraph!'
    exit(-1)

graph = BigGraph(filename)
#graph = BigGraph()

#graph.add_edge(1,2)
#graph.add_edge(2,3)
#graph.add_edge(3,1)

#graph.add_edge(4,5)

aux_graph = Graph()
connected_graph = BigGraph(outname)

graph.add_random_component(aux_graph)

for src, dst in aux_graph.edges_iter():
    connected_graph.add_edge(src, dst)
示例#6
0
if not outname:
    #outname = 'orkut-2k_sym.big_graph'
    print 'Error: second argument missing, output filename!'
    exit(-1)

links = len(sys.argv) > 3 and sys.argv[3] or None

if not links:
    #links = 2000
    print 'Error: third argument missing, max number of links!'
    exit(-1)
else:
    links = int(links)

graph = BigGraph(outname)

graph.debug = True
graph.input_debug_links = 200000
graph.output_debug_nodes = 10000

graph.max_links_input = links
graph.max_nodes_analysis = 10000

graph.load_edgelist(open(filename))

graph.create_indices()

print 'output Graph (possibly disconnected):'
print 'nodes = %d' % graph.number_of_nodes()
print 'edges = %d' % graph.number_of_edges()
示例#7
0
        else:
            q.put('FINISHED')


create_dbs = True

if create_dbs:
    for strat_name in strats:
        for l in lookaheads:

            new_filename = filename + '.%s.lookahead%d' % (strat_name, l)
            print 'creating BigGraph ' + new_filename
            os.system('cp %s %s' % (filename, new_filename))

print 'choosing random node seed for crawlers...'
graph = BigGraph(filename, cache_size_pages)
node_rand_seed = graph.random_nodes()[0]
del graph
print 'done.  choosed -> %s' % str(node_rand_seed)

processors = len(strats) * len(lookaheads)  # 4

out = open(outname, 'w')

processes = []
graphs = []
list_of_strategies = []
results = 0
for strat_name in strats:
    for l in lookaheads: