# then we'll mine the 37K vertex / 9M edge network for compression patterns. # # This will be the most time consuming part of our exercise, it takes roughly 5.5 mins # on my magnetic drive / 8gb ram / i7 laptop. # # In[40]: this_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) cache_dir = this_dir+'/.twitter_politics/' if os.path.exists(cache_dir+'parent'): gw = GraphWrapper.from_previous_reduction(cache_dir) else: v_path = 'http://static.smarttypes.org/static/graphreduce/test_data/TheDemocrats_GOP.vertex.csv.gz' e_path = 'http://static.smarttypes.org/static/graphreduce/test_data/TheDemocrats_GOP.edge.csv.gz' gw, mdls = GraphWrapper.reduce(v_path, e_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) gw.save(cache_dir) # ## Network community detection # # The topic of community detection is broad and deep. # The method here, # [the map equation](http://arxiv.org/abs/0906.1405), # uses information theory to quantify the compression of a random walk on our network. # [Relaxmap](http://uwescience.github.io/RelaxMap/) # is a parallel implementation of the map equation objective. # # The method also tags communities, giving us a quick idea of
import os, math, inspect from IPython.display import display_html from operator import mul import graphlab as gl from graphreduce.graph_wrapper import GraphWrapper this_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) cache_dir = this_dir + '/.twitter_politics/' if os.path.exists(cache_dir + 'parent'): gw = GraphWrapper.from_previous_reduction(cache_dir) else: v_path = '/home/kcavagnolo/downloads/TheDemocrats_GOP.vertex.csv.gz' e_path = '/home/kcavagnolo/downloads/TheDemocrats_GOP.edge.csv.gz' gw, mdls = GraphWrapper.reduce(v_path, e_path) if not os.path.exists(cache_dir): os.makedirs(cache_dir) gw.save(cache_dir) def display_table(rows): table_template = '<table>%s</table>' row_template = '<tr>%s</tr>' header_column_template = '<th>%s</th>' normal_column_template = '<td>%s</td>' rows_html = [] for i, row in enumerate(rows): row_html = [] for column in row: col_template = header_column_template if i == 0 else normal_column_template row_html.append(col_template % column)
""" $ python run.py vertex_path edge_path output_dir """ import sys from graphreduce.graph_wrapper import GraphWrapper if __name__ == "__main__": vertex_path = sys.argv[1] edge_path = sys.argv[2] output_dir = sys.argv[3] gw, mdls = GraphWrapper.reduce(vertex_path, edge_path, output_dir)
import unittest, inspect, os from graphreduce.graph_wrapper import GraphWrapper vertex_path = 'http://static.smarttypes.org/static/graphreduce/test_data/vertex.csv.gz' edge_path = 'http://static.smarttypes.org/static/graphreduce/test_data/edge.csv.gz' if __name__ == '__main__': gw, mdls = GraphWrapper.reduce(vertex_path, edge_path)