def analysis_2(work_a, work_b): """""" members_a = books.find_all_members(work_a) print len(remove_duplicate(members_a)) members_b = books.find_all_members(work_b) adj_list = friends_list(members_a) adj_list = make_cleand_adjacancy_list(adj_list, members_a) #G1 = create_graph(remove_not_book_member(adj_list, members_a)) #plot_graph(G1, members_a, members_b) adj_list_b = friends_list(members_b) adj_list_b = make_cleand_adjacancy_list(adj_list_b, members_b) #G2 = create_graph(remove_not_book_member(adj_list_b, members_b)) #plot_graph(G2, members_a, members_b) adj_list.update(adj_list_b) all_members = remove_duplicate(members_a + members_b) adj_list = remove_not_book_member(adj_list, all_members) print len(adj_list) G = create_graph(adj_list) #plot_graph(G, members_a, members_b) #save_edges(G) GC = connected_component_subgraphs(G)[0] # Giant Component center = sort_dict(closeness_centrality(GC))[0] #center = sort_dict(eigenvector_centrality(GC))[0] #center = sort_dict(betweenness_centrality(GC))[0] #pos = graphviz_layout(G, prog="twopi", root=center) # draw and save #plot_graph(GC, members_a, members_b) #plot_hist(GC) return G
def analysis_1(work_a, work_b): """""" members_a = books.find_all_members(work_a) adj_list = find_adjacancy_list(work_a) adj_list = make_cleand_adjacancy_list(adj_list, members_a) # make sure every member of work is in adjacancy list for member in members_a: if member not in adj_list: log('%s removed from adjacancy list of %s' % (member, work_a), 'Error') members_b = books.find_all_members(work_b) adj_list_b = find_adjacancy_list(work_b) adj_list_b = make_cleand_adjacancy_list(adj_list_b, members_b) # make sure every member of work is in adjacancy list for member in members_b: if member not in adj_list_b: log('%s removed from adjacancy list of %s' % (member, work_b), 'Error') # create the big adjacanvy list for mambers of both books adj_list.update(adj_list_b) G = create_graph(adj_list) save_edges(G, True, 'all_friends') #print betweenness_centrality(G,normalized=False) print '%s has %d members' % (work_a, len(remove_duplicate(members_a))) print '%s has %d members' % (work_b, len(remove_duplicate(members_b))) same = same_users_between(members_a, members_b) print 'and %s of them are in both works' % len(same) #avg = average_shortest_path_between(G, members_a, members_b) #print 'average shortest path between %s and %s %f' % (work_a, work_b, avg) #avg = average_shortest_path_between(G, members_a, members_a) #print 'average shortest path between members of %s is %f' % (work_a, avg) #avg = average_shortest_path_between(G, members_b, members_b) #print 'average shortest path between members of %s is %f' % (work_b, avg) return G
def find_adjacancy_list(work): """(str)->dict dsc: loads all members of a work and save user name with all friends of her to a json file """ file_path = './data/%s_members.json' % work try: # if we have information alredy with open(file_path): adj_list = load_local_friends(file_path) except IOError: # otherwise calc it and save it for further use # Note: this algorithm is very ineficient thats why we save the result members = books.find_all_members(work) print len(members) print len(remove_duplicate(members)) adj_list = get_adjacancy_list(members) print len(adj_list) for k, v in adj_list.items(): record = json.dumps({k: v}) write_to_file(record, file_path) return adj_list
""" finds book catalogs for all members of a given work id and saves them in html files: "./data/profile/catalog/*.html" """ import users import books import json import sys from helpers import * try: work = sys.argv[1] except: work = '306947' members = books.find_all_members(work) # all members of given work members = remove_duplicate(members) print len(members) i = 0 for member in members: i += 1 print '%d of %d, %s' % (i, len(members), member) users.find_books(member)
#center = sort_dict(betweenness_centrality(GC))[0] #pos = graphviz_layout(G, prog="twopi", root=center) # draw and save #plot_graph(GC, members_a, members_b) #plot_hist(GC) return G def analysis_3(work_a, work_b): """""" adj_list = friends_list(members_a) adj_list = make_cleand_adjacancy_list(adj_list, members_a) adj_list_b = friends_list(members_b) adj_list_b = make_cleand_adjacancy_list(adj_list_b, members_b) adj_list.update(adj_list_b) all_members = remove_duplicate(members_a + members_b) adj_list = remove_not_book_member(adj_list, all_members) G = create_weighted_graph(adj_list) C = connected_component_subgraphs(G)[0] # Giant Component plot_graph(G, members_a, members_b, node_lable='True') if __name__ == '__main__': work_a = '306947' # The Holy Bible: King James Version (KJV) work_b = '1576656' # The Blind Watchmaker members_a = books.find_all_members(work_a) members_b = books.find_all_members(work_b) #G = analysis_1(work_a, work_b) #G = analysis_2(work_a, work_b) #G = analysis_3(work_a, work_b)