Пример #1
0
#!/usr/bin/python
'''
Created on Jun 7, 2010

@author: jose
'''

#!/usr/bin/python
from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

debug = True

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)
graph.debug = debug

print 'indexing with create_index_kcores()'
graph.create_index_kcores()
Пример #2
0
#!/usr/bin/python
'''
Created on May 11, 2010

@author: jose
'''

#!/usr/bin/python
from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)

print 'indexing with create_index_knn()'
graph.create_index_knn()
Пример #3
0
import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

outname =  len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive'
    print 'Error: second argument missing, output filename!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)


out = open(outname, 'w')

total_nodes = graph.number_of_nodes()
count = 0
for node, clustering in graph.get_parameter_cache_iter('clustering'):
    
    
    if count % 10000 == 0:
        print 'INFO: exporting clustering %d nodes of %d total nodes' % (count, total_nodes)
        
    out.write('%s\t\t%f\n' % (node, clustering) )
    
    count += 1
Пример #4
0
#!/usr/bin/python
'''
Created on Jul 27, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)
graph.debug = True

print 'indexing with create_index_triangles()'
graph.create_index_triangles()
Пример #5
0
import sys

print sys.argv

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**18

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)

print 'indexing with create_index_degree()'
graph.create_index_degree()

print 'indexing with create_index_knn()'
graph.create_index_knn()

print 'indexing with create_index_clustering()'
graph.create_index_clustering()

print 'indexing with create_index_kcores()'
graph.create_index_kcores()

print 'indexing with create_index_triangles()'
graph.create_index_triangles()
Пример #6
0
import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

outname =  len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive'
    print 'Error: second argument missing, output filename!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)


out = open(outname, 'w')

total_nodes = graph.number_of_nodes()
count = 0
for node, shell in graph.get_parameter_cache_iter('shell'):
    
    
    if count % 10000 == 0:
        print 'INFO: exporting knn %d nodes of %d total nodes' % (count, total_nodes)
        
    out.write('%s\t\t%f\n' % (node, shell) )
    
    count += 1
Пример #7
0
#!/usr/bin/python
'''
Created on Jul 30, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'indexing with create_index_seen_triangles()'
graph.add_parameter_cache('seen_triangles')
graph.initialize_parameter('seen_triangles', 0.0)
graph.index_parameter_cache('seen_triangles')
graph.add_parameter_cache('seen_triangles2')
graph.initialize_parameter('seen_triangles2', 0.0)
graph.index_parameter_cache('seen_triangles2')

if __name__ == '__main__':
    pass
Пример #8
0
#!/usr/bin/python
'''
Created on Aug 1, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'indexing with create_index_seen_degree()'
graph.add_parameter_cache('seen_degree')
graph.initialize_parameter('seen_degree', 0.0)
graph.index_parameter_cache('seen_degree')
graph.add_parameter_cache('seen_degree2')
graph.initialize_parameter('seen_degree2', 0.0)
graph.index_parameter_cache('seen_degree2')

if __name__ == '__main__':
    pass
Пример #9
0
#!/usr/bin/python
'''
Created on Jun 29, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('knn')



if __name__ == '__main__':
    pass


Пример #10
0
#!/usr/bin/python
'''
Created on Jun 29, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('clustering')

if __name__ == '__main__':
    pass
Пример #11
0
#!/usr/bin/python
'''
Created on Jun 29, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('shell')

if __name__ == '__main__':
    pass
Пример #12
0
        else:
            q.put('FINISHED')


create_dbs = True

if create_dbs:
    for strat_name in strats:
        for l in lookaheads:

            new_filename = filename + '.%s.lookahead%d' % (strat_name, l)
            print 'creating BigGraph ' + new_filename
            os.system('cp %s %s' % (filename, new_filename))

print 'choosing random node seed for crawlers...'
graph = BigGraph(filename, cache_size_pages)
node_rand_seed = graph.random_nodes()[0]
del graph
print 'done.  choosed -> %s' % str(node_rand_seed)

processors = len(strats) * len(lookaheads)  # 4

out = open(outname, 'w')

processes = []
graphs = []
list_of_strategies = []
results = 0
for strat_name in strats:
    for l in lookaheads:
Пример #13
0
#!/usr/bin/python
'''
Created on May 11, 2010

@author: jose
'''

#!/usr/bin/python
from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)

print 'indexing with create_index_degree()'
graph.create_index_degree()
Пример #14
0
digraph = BigDiGraph(graphfile + '.big_digraph')

digraph.debug = True
digraph.input_debug_links = 200000
digraph.output_debug_nodes = 100

digraph.max_links_input = links
digraph.max_nodes_analysis = 10000

print 'digraph.load_edgelist(open(filename)) ...'
digraph.load_edgelist(open(filename))

print 'digraph.create_indices() ...'
digraph.create_indices()

graph = BigGraph(graphfile + '.disconnected')

print 'digraph.add_only_symmetric_edgelist(graph) ...'
digraph.add_only_symmetric_edgelist(graph)

print 'graph.create_indices() ...'
graph.create_indices()

number_of_nodes = graph.number_of_nodes()
comps = [
    len(comp) / float(number_of_nodes)
    for comp in graph.connected_components()
]

if comps[0] < 0.5:
    print 'ERROR: biggest connected componnet not found!'
Пример #15
0
import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive'
    print 'Error: second argument missing, output filename!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

out = open(outname, 'w')

total_nodes = graph.number_of_nodes()
count = 0
for node, degree in graph.get_parameter_cache_iter('degree'):

    if count % 10000 == 0:
        print 'INFO: exporting degree %d nodes of %d total nodes' % (
            count, total_nodes)

    out.write('%s\t\t%f\n' % (node, degree))

    count += 1
Пример #16
0
import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive'
    print 'Error: second argument missing, output filename!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)
graph.debug = True

out = open(outname, 'w')

total_nodes = graph.number_of_nodes()
count = 0
for node, triangles in graph.get_parameter_cache_iter('triangles'):

    if count % 10000 == 0:
        print 'INFO: exporting triangles %d nodes of %d total nodes' % (
            count, total_nodes)

    out.write('%s\t\t%f\n' % (node, triangles))

    count += 1
Пример #17
0
#!/usr/bin/python
'''
Created on May 23, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'indexing with create_index_unseen_triangles()'
graph.index_parameter_from_parameter('triangles', 'unseen_triangles')

if __name__ == '__main__':
    pass
Пример #18
0
#!/usr/bin/python
'''
Created on Oct 4, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

if __name__ == '__main__':

    print 'Starting dump_snowball'

    filename = len(sys.argv) > 1 and sys.argv[1] or None

    if not filename:
        print 'Error: first argument missing, input filename with BigGraph archive!'
        exit(-1)

    outname = len(sys.argv) > 2 and sys.argv[2] or None

    if not outname:
        print 'Error: first argument missing, output filename with edgelist archive!'
        exit(-1)

    g = BigGraph(filename)
    g.debug = True
    g.save_snowball_edgelist_iter(outname)
Пример #19
0
if not outname:
    #outname = 'orkut-2k_sym.big_graph'
    print 'Error: second argument missing, output filename!'
    exit(-1)

links = len(sys.argv) > 3 and sys.argv[3] or None

if not links:
    #links = 2000
    print 'Error: third argument missing, max number of links!'
    exit(-1)
else:
    links = int(links)

graph = BigGraph(outname)

graph.debug = True
graph.input_debug_links = 200000
graph.output_debug_nodes = 10000

graph.max_links_input = links
graph.max_nodes_analysis = 10000

graph.load_edgelist(open(filename))

graph.create_indices()

print 'output Graph (possibly disconnected):'
print 'nodes = %d' % graph.number_of_nodes()
print 'edges = %d' % graph.number_of_edges()
Пример #20
0
filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    #filename = 'orkut-links-fst.txt.toundirected.30mill'
    print 'Error: first argument missing, input filename with space separated graph!'
    exit(-1)

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    #outname = 'orkut-2k_sym.big_graph'
    print 'Error: second argument missing, output filename!'
    exit(-1)

graphfile = filename

graph = BigGraph(graphfile)

graph.debug = True
graph.input_debug_links = 200000
graph.output_debug_nodes = 10000

print 'NODES:'
print graph.number_of_nodes()
print 'EDGES:'
print graph.number_of_edges()
print 'dumping Graph to edge list file ...'
graph.save_edgelist(outname)
print 'done.'
Пример #21
0
print sys.argv

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph!'
    exit(-1)

outname = len(sys.argv) > 2 and sys.argv[2] or None

if not outname:
    print 'Error: second argument missing, output filename for BigGraph!'
    exit(-1)

graph = BigGraph(filename)
#graph = BigGraph()

#graph.add_edge(1,2)
#graph.add_edge(2,3)
#graph.add_edge(3,1)

#graph.add_edge(4,5)

aux_graph = Graph()
connected_graph = BigGraph(outname)

graph.add_random_component(aux_graph)

for src, dst in aux_graph.edges_iter():
    connected_graph.add_edge(src, dst)
Пример #22
0
#!/usr/bin/python
'''
Created on Jul 27, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('triangles')

if __name__ == '__main__':
    pass
Пример #23
0
#!/usr/bin/python
'''
Created on May 23, 2010

@author: jose
'''
from cloudlight import BigGraph

import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'indexing with create_index_unseen_degree()'
graph.create_index_unseen_degree()



if __name__ == '__main__':
    pass
Пример #24
0
if not type:
    print 'Error: second argument missing, input sphere type (link or node all )!'
    exit(-1)

lookahead =  len(sys.argv) > 3 and int(sys.argv[3]) or None 

if not lookahead and type != 'all': 
    print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**18

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)
graph.debug = True

if type == 'link':
    
    print 'indexing with create_index_linksphere( lookahead  = %d)' % lookahead
    graph.create_index_linksphere(lookahead)
    
elif type == 'node':
    
    print 'indexing with create_index_nodesphere( lookahead  = %d)' % lookahead
    graph.create_index_nodesphere(lookahead)
    
elif type == 'all':
    
    #for lookahead in [1,2,3]:
Пример #25
0
lookahead = len(sys.argv) > 3 and int(sys.argv[3]) or None

if not lookahead:
    print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...'
    exit(-1)

outname = len(sys.argv) > 4 and sys.argv[4] or None

if not outname:
    #outname = '/tesis/flickr-growth.txt-200k.big_graph.passive'
    print 'Error: second argument missing, output filename!'
    exit(-1)

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

out = open(outname, 'w')

total_nodes = graph.number_of_nodes()
count = 0
for node, clustering in graph.get_parameter_cache_iter('%ssphere%d' %
                                                       (type, lookahead)):

    if count % 10000 == 0:
        print 'INFO: exporting clustering %d nodes of %d total nodes' % (
            count, total_nodes)

    out.write('%s\t\t%f\n' % (node, clustering))

    count += 1
Пример #26
0
if not type:
    print 'Error: second argument missing, input sphere type (link o node)!'
    exit(-1)

lookahead =  len(sys.argv) > 3 and int(sys.argv[3]) or None 

if not lookahead and type != 'all':
    print 'Error: third argument missing, input sphere lookahead (radius minus one)! lookahead 0 (zero) equals degree...'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**18

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)


if type == 'link' or type == 'node' :
    
    print 'dropping index with remove_parameter_cache() %s lookahead %d' % (type, lookahead) 
    graph.remove_parameter_cache('%ssphere%d' % (type, lookahead) )
    
elif type == 'all':
    
    #for lookahead in [1,2,3]:
    for lookahead in [1, 2]:

        type = 'node'
        print 'dropping index with remove_parameter_cache() %s lookahead %d' % (type, lookahead)
        graph.remove_parameter_cache('%ssphere%d' % (type, lookahead) )
Пример #27
0
#!/usr/bin/python
'''
Created on May 11, 2010

@author: jose
'''

#!/usr/bin/python
from cloudlight import BigGraph

import sys

filename = len(sys.argv) > 1 and sys.argv[1] or None

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'
    exit(-1)

# cache size in 2KB pages (?)
cache_size = 2**16

print 'opening BigGraph ' + filename
graph = BigGraph(filename, cache_size)

print 'indexing with create_index_clustering()'
graph.create_index_clustering()
Пример #28
0
#!/usr/bin/python
'''
Created on Aug 1, 2010

@author: jose
'''
from cloudlight import BigGraph

import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'dropping index with remove_parameter_cache()'
graph.remove_parameter_cache('seen_degree')



if __name__ == '__main__':
    pass
Пример #29
0
#!/usr/bin/python
'''
Created on May 27, 2010

@author: jose
'''

from cloudlight import BigGraph

import sys

filename =  len(sys.argv) > 1 and sys.argv[1] or None 

if not filename:
    print 'Error: first argument missing, input filename with BigGraph archive!'

print 'opening BigGraph ' + filename
graph = BigGraph(filename)

print 'nodes: %d' % graph.number_of_nodes()
print 'edges: %d' % graph.number_of_edges()