def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle, self.netdata.get_friends_nodes(self.usercircle), self.interact_type, self.K,data_type="learn") #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends() if len(close_users)< self.K: logging.warning("Cannot find k closest friends for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users,self.max_items) """ if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors(self.usercircle, self.netdata.get_nonfriends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn" ) #print "Num close users", len(close_users) if len(close_users) < self.K: logging.warning("Cannot find k closest global for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs(close_users, self.max_items) """ Length of recs can be zero because there are so little train_interactions of the close users if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors( self.usercircle, self.netdata.get_friends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn") #print "Num close users", len(close_users), "Num friends", self.usercircle.get_num_friends() if len(close_users) < self.K: logging.warning("Cannot find k closest friends for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs( close_users, self.max_items) """ if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def recommend(self): close_users = BasicNetworkAnalyzer.compute_knearest_neighbors( self.usercircle, self.netdata.get_nonfriends_nodes(self.usercircle), self.interact_type, self.K, data_type="learn") #print "Num close users", len(close_users) if len(close_users) < self.K: logging.warning("Cannot find k closest global for recommend") return None self.rec_items = self.usercircle.compute_weighted_popular_recs( close_users, self.max_items) """ Length of recs can be zero because there are so little train_interactions of the close users if len(self.rec_items) == 0: print "oh" for sim, unode in close_users: print unode.length_train_ids """ return self.rec_items
def run_computation(data, computation_cmd, outf, interact_type, create_fake_prefs, allow_duplicates, split_date_str, dataset_domain, dataset_path, min_interacts_beforeaftersplit_per_user, max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio, traindata_fraction, M): net_analyzer = BasicNetworkAnalyzer(data) interaction_types = data.interact_types_dict filename_prefix = computation_cmd if computation_cmd is not None else "" if computation_cmd=="basic_stats" or computation_cmd is None: net_analyzer.show_basic_stats() ## use below if you want to write a new dataset (e.g. after filtering) data.store_ego_dataset("/home/amit/datasets/social_activity_data/lastfm_filtered_listen/", write_maps=False) #data.compute_allpairs_sim(interact_type, data_type=ord("a")) elif computation_cmd=="random_similarity": for type_name, type_index in interaction_types.iteritems(): circlesims, globalsims = net_analyzer.compare_circle_global_similarity(type_index, num_random_trials=5, cutoff_rating=cutoff_rating) #plotter.plotLinesYY(circlesims, globalsims, "Friends", "Global") outf.write("User_id\tcircle_sim\tnonfriend_sim\n") outf.write(type_name + '\n') for ind in range(len(circlesims)): outf.write("%s\t%f\t%f\n" %(circlesims[ind][0], circlesims[ind][1], globalsims[ind][1])) print "\n", type_name, ":" print "Circle Average", sum([v2 for v1,v2 in circlesims])/float(len(circlesims)) print "Global Average", sum([v2 for v1,v2 in globalsims])/float(len(globalsims)) elif computation_cmd=="knn_similarity": #Compute K-nearest similarity KLIMITS = [10] outf.write("User_id\tk\tcircle_sim\tnonfriend_sim\n") for type_name, type_index in interaction_types.iteritems(): for curr_lim in KLIMITS: plot_circle, plot_external = net_analyzer.compare_circle_global_knnsimilarity(type_index, klim=curr_lim, cutoff_rating=cutoff_rating) compare_sims(plot_circle, plot_external) outf.write(type_name+'\n') for ind in range(len(plot_circle)): outf.write("%s\t%d\t%f\t%f\n" %(plot_circle[ind][0], curr_lim, plot_circle[ind][1], plot_external[ind][1])) #plotter.plotLinesYY(plot_circle, plot_external, "Friends", "Global") print type_name, "K", curr_lim print "Circle Average", utils.mean_sd([v2 for v1,v2 in plot_circle]), len(plot_circle) print "Global Average", utils.mean_sd([v2 for v1,v2 in plot_external]), len(plot_external) elif computation_cmd=="knn_recommender": #Compute K-nearest recommender KLIMITS = [10] rec_analyzer = RecommenderAnalyzer(data, max_recs_shown=10, traintest_split=0.7, cutoff_rating=cutoff_rating) outf.write("User_id\tk\trun_index\tcircle_ndcg\tnonfriend_ndcg\n") for type_name, type_index in interaction_types.iteritems(): for curr_lim in KLIMITS: local_avg=[] global_avg=[] Ntotal = 10 for i in range(Ntotal): # randomize because of training-test split. plot_circle, plot_external = rec_analyzer.compare_knearest_recommenders(type_index, klim=curr_lim, num_processes=2) compare_sims(plot_circle, plot_external) outf.write(type_name + "\n") for ind in range(len(plot_circle)): outf.write("%s\t%d\t%d\t%f\t%f\n" %(plot_circle[ind][0], curr_lim, i, plot_circle[ind][1], plot_external[ind][1])) print "\n", type_name, "K", curr_lim #print plot_circle, plot_external curr_avg_local = utils.mean_sd([v2 for v1,v2 in plot_circle]) curr_avg_global = utils.mean_sd([v2 for v1,v2 in plot_external]) print "Circle Average", curr_avg_local print "Global Average", curr_avg_global local_avg.append(curr_avg_local[0]) global_avg.append(curr_avg_global[0]) #plotLinesYY(plot_circle, plot_external, "Friends", "Global") print "Local", sum(local_avg)/float(Ntotal) print "Global", sum(global_avg)/float(Ntotal) elif computation_cmd == "circle_coverage": lim_friends = [(5,10), (10,20), (20,50), (50,100)] for fr_limit in lim_friends: locality_analyzer = LocalityAnalyzer(data) coverage_list = locality_analyzer.compare_circle_item_coverages(0, fr_limit[0], fr_limit[1]) plotter.plotLineY(sorted(coverage_list), "User", "Fraction of Items Covered with %d-%d friends" % (fr_limit[0], fr_limit[1])) print utils.mean_sd(coverage_list) elif computation_cmd == "items_edge_coverage": locality_analyzer = LocalityAnalyzer(data) items_cov_list, items_popularity, cov_ratio_list = locality_analyzer.compare_items_edge_coverage(1, minimum_interactions=1) print utils.mean_sd(items_cov_list) print utils.mean_sd(items_popularity) #plotter.plotHist(sorted([val for val in cov_ratio_list if val<=1]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True) #####plotter.plotHist(sorted([val for val in cov_ratio_list]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True) #plotter.plotHist(sorted(items_popularity), "Item", "total popularity") plotter.plotCumulativePopularity(items_popularity, labelx="Item percentile", labely="Cum. percent of number of likes") elif computation_cmd == "network_draw": net_visualizor = NetworkVisualizor(data) net_visualizor.draw_network() elif computation_cmd == "network_item_adopt": net_visualizor = NetworkVisualizor(data) pprint(net_visualizor.plot_item_adoption(1669118)) elif computation_cmd == "node_details": for node_id in open('user_ids'): if node_id.strip('\n') != "User_id": net_analyzer.get_node_details(int(node_id.strip('\n'))) elif computation_cmd=="store_dataset": user_interacts = net_analyzer.get_user_interacts(1, cutoff_rating) f = open(outf_path+ 'user_interacts_'+dataset_domain+'.tsv', 'w') f.write("user_id\titem_id\ttimestamp\n") for user_id, item_id, timestamp in user_interacts: f.write("%s\t%s\t%s\n" %(user_id, item_id, timestamp)) f.close() item_pop = net_analyzer.get_items_popularity(1, cutoff_rating) f = open(outf_path+'items_'+dataset_domain+'.tsv','w') f.write("item_id\tpopularity\n") for item_id, pop in item_pop.iteritems(): f.write("%s\t%s\n" %(item_id, pop)) f.close() user_friends = net_analyzer.get_user_friends() f = open('user_friends_'+dataset_domain+'.tsv','w') f.write("user_id\tfriend_id\n") for user_id, friend_id in user_friends: f.write("%s\t%s\n" %(user_id, friend_id)) f.close() print "Successfully stored tsv dataset" elif computation_cmd=="compare_interact_types": num_interacts_dict = net_analyzer.compare_interaction_types() interact_types = num_interacts_dict.keys() plotter.plotLinesYY(num_interacts_dict[interact_types[0]], num_interacts_dict[interact_types[1]], interact_types[0], interact_types[1], display=True, logyscale=True) plotter.plotLinesYY(num_interacts_dict[interact_types[1]], num_interacts_dict[interact_types[2]], interact_types[1], interact_types[2], display=True, logyscale=True) plotter.plotLinesYY(num_interacts_dict[interact_types[0]], num_interacts_dict[interact_types[2]], interact_types[0], interact_types[2], display=True, logyscale=True) elif computation_cmd=="influence_test": # ta = TemporalAnalyzer(data) #interact_type = data.interact_types_dict["listen" # time_scale can be 'w':wallclock_time or 'o':ordinal_time split_date_str = "2008/01/01" t_window = -1 t_scale = ord('w') max_tries_val = 10000 max_node_computes_val = 100 max_interact_ratio_error = 0.1 klim_val=5 split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple())) # crate trainig test sets that will be used by fake geernation data.create_training_test_bytime(interact_type, split_timestamp) if create_fake_prefs is not None: print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences(data,interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) #fake_data.generate_random_preferences(data, interact_type, split_timestamp) print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) # Need to generate again because fake data changes test data data.create_training_test_bytime(interact_type, split_timestamp) la = LocalityAnalyzer(data) inf_tuple = compute.test_influence(la, interact_type=interact_type, time_diff=t_window, time_scale=ord('w'), split_timestamp=split_timestamp, #time_diff=100000, split_date_str="1970/06/23", control_divider=0.01, min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user, max_tries = max_tries_val, max_node_computes=max_node_computes_val, num_processes=4, max_interact_ratio_error=max_interact_ratio_error, klim=klim_val, method="influence") print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3]) num_vals = len(inf_tuple[0]) f = open("influence_test", "w") for i in range(num_vals): f.write("%f\t%f\t%f\t%f\n" % (inf_tuple[0][i], inf_tuple[1][i], inf_tuple[2][i], inf_tuple[3][i])) f.close() elif computation_cmd=="suscept_test": use_artists = "songs" if "songs" in dataset_path else "artists" interact_type_str = "listen" if interact_type==0 else "love" #M = [50]#,20]#,30,40,50] t_scale = ord('o') # ordinal scale, this is the default used in paper. NUM_NODES_TO_COMPUTE = 4000000 # maximum number nodes to compute? num_threads=4 # the number of threads to spawn max_tries_val = None#30000 # should we stop after max_tries? max_node_computes_val = NUM_NODES_TO_COMPUTE/num_threads # number of nodes to compute at each node #max_interact_ratio_error =0.2 # these are errors (defaults are 0.1,0.1) #max_sim_ratio_error = 0.2 #min_friends_match_ratio = 0.5 # important to be 1 for simulation--because e.g. in influence, we use a person's all friends to compute his next like klim_val = None # not used for influence test nonfr_match = "random" #random, serial, kbest. Default is random. num_loop = 1 # number of times we calculate this. For averaging results over multiple runs. f = open("suscept_test_results/"+dataset_domain + dataset_path.split("/")[-2] + interact_type_str+ strftime("%Y-%m-%d_%H:%M:%S")+'.dat', 'w') f.write("# use_artists=%r\tallow_duplicates=%r\tmax_node_computes_val=%d\tcreate_fake_prefs=%r\tnum_loop=%d\n" % ( use_artists, allow_duplicates, max_node_computes_val, create_fake_prefs, num_loop)) f.write("# split_train_test_date=%s\ttime_scale=%d\tmin_interactions_beforeaftersplit_per_user=%d\tnum_threads=%d\n" % ( split_date_str, t_scale, min_interacts_beforeaftersplit_per_user, num_threads)) f.write("# max_interact_ratio_error=%f\tmax_sim_ratio_error=%f\tmin_friends_match_ratio=%f\n" %( max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio )) for t_window in M: for h in range(num_loop): f.write("\n\n################### ALERTINFO: STARTING ITERATION %d with M=%d\n" %( h, t_window)) if split_date_str=="test": split_timestamp = 2000 else: split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple())) #split_timestamp=25000000 if create_fake_prefs is not None: data.create_training_test_bytime(interact_type, split_timestamp) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences(data,interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) # Need to generate again because fake data changes test data data.create_training_test_bytime(interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user) la = LocalityAnalyzer(data) inf_tuple = compute.test_influence(la, interact_type=interact_type, time_diff=t_window, time_scale=t_scale, split_timestamp=split_timestamp, #time_diff=100000, split_date_str="1970/06/23", control_divider=0.01, # not used anymore min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user, max_tries = max_tries_val, max_node_computes=max_node_computes_val, num_threads=num_threads, max_interact_ratio_error = max_interact_ratio_error, max_sim_ratio_error = max_sim_ratio_error, min_friends_match_ratio=min_friends_match_ratio, klim = klim_val, nonfr_match=nonfr_match, method="suscept", allow_duplicates=allow_duplicates) print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3]) num_vals = len(inf_tuple[0]) f.write("TestSetSize\tFrSimilarity\tNonFrSimilarity\tFrOverlap\tNonFrOverlap\tRandom_run_no\tM\n") for i in range(num_vals): f.write("%d\t%f\t%f\t%f\t%f\t%d\t%d\n" % (inf_tuple[0][i], inf_tuple[1][i], inf_tuple[2][i], inf_tuple[3][i], inf_tuple[4][i], h, t_window)) f.close() elif computation_cmd=="gen_adopt_data": t_window = 100 t_scale = ord('o') if split_date_str=="test": split_timestamp = 2000 else: split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple())) if create_fake_prefs is not None: data.create_training_test_bytime(interact_type, split_timestamp) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences(data,interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) data.create_training_test_bytime(interact_type, split_timestamp) gen_adopt.generate_adoption_data(data, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale) elif computation_cmd=="compute_split_date": ret_timestamp = compute.compute_cutoff_date(data, interact_type, traindata_fraction) print ret_timestamp print datetime.datetime.fromtimestamp(ret_timestamp*86400).strftime("%Y-%m-%d") """
def instantiate_networkdata_class(dataset_domain, dataset_path, impl_type, max_core_nodes, cutoff_rating, store_dataset, interact_type_val, min_interacts_beforeaftersplit_per_user): data = None #h = hpy() #h.setref() if dataset_domain == "twitter": data = HashtagDataPreparser(dataset_path, impl_type) elif dataset_domain == "lastfm": data = LastfmDataPreparserCSV(dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, use_artists=False) elif dataset_domain == "lastfm_simple": data = LastfmDataPreparserSimple( dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, use_artists=False, interact_type_val=interact_type_val, min_interactions_per_user=min_interacts_beforeaftersplit_per_user * 2) elif dataset_domain == "lastfm_lovelisten": data = LastfmDataPreparserLovelisten( dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, use_artists=False, interact_type_val=interact_type_val, min_interactions_per_user=min_interacts_beforeaftersplit_per_user * 2) elif dataset_domain == "goodreads": data = GoodreadsDataPreparser( dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, min_interactions_per_user=min_interacts_beforeaftersplit_per_user * 2) elif dataset_domain == "flixster": data = FlixsterDataPreparser( dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, min_interactions_per_user=min_interacts_beforeaftersplit_per_user * 2) elif dataset_domain == "flickr": data = FlickrDataPreparser( dataset_path, impl_type, cutoff_rating, max_core_nodes, store_dataset, min_interactions_per_user=min_interacts_beforeaftersplit_per_user * 2) try: data.get_all_data() BasicNetworkAnalyzer(data).show_basic_stats() except: raise return data
def compute_susceptibility_randomselect( netdata, nodes_list, interact_type, cutoff_rating, control_divider, min_interactions_per_user, time_diff, time_scale, max_tries, max_node_computes, max_interact_ratio_error, nonfr_match, allow_duplicates): # Find similarity on training set max_sim_ratio_error = 0.1 triplet_nodes = [] counter = 0 failed_counter = 0 eligible_nodes_counter = 0 count_success = 0 edges_counter = 0 total_tries_counter = 0 time_saved_counter = 0 if max_tries is None: max_tries = netdata.get_total_num_nodes() randomized_node_ids = random.sample( xrange(1, netdata.get_total_num_nodes() + 1), max_tries) data_type = "compare_train" data_type_code = ord(data_type[0]) #sim_dict = {} for node in nodes_list: nonfr_ids = {} sim_dict = {} num_node_interacts = node.get_num_interactions( interact_type) # return all interactions, no check for duplicates #if not node.has_interactions(interact_type) or not node.has_friends(): if node.length_train_ids < min_interactions_per_user or node.length_test_ids < min_interactions_per_user or not node.has_friends( ): #print "Node has no interactions. Skipping!" counter += 1 continue eligible_nodes_counter += 1 fnodes = netdata.get_friends_nodes(node) control_nonfr_nodes = [] avg_fsim = 0 avg_rsim = 0 num_eligible_friends = 0 selected_friends = [] friend_ids = node.get_friend_ids() edges_counter += len(friend_ids) for fobj in fnodes: num_fobj_interacts = fobj.get_num_interactions(interact_type) if fobj.length_train_ids >= min_interactions_per_user and fobj.length_test_ids >= min_interactions_per_user: """ fsim2 = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) """ if (fobj.uid, node.uid) in sim_dict: fsim = sim_dict[(fobj.uid, node.uid)] elif (node.uid, fobj.uid) in sim_dict: fsim = sim_dict[(node.uid, fobj.uid)] else: fsim = node.compute_node_similarity( fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(fobj.uid, node.uid)] = fsim #if fsim is None: # print "Error:fsim cannot be None" #print fsim found = False if fsim is not None and fsim != -1: num_eligible_friends += 1 total_tries_counter += 1 tries = 0 if nonfr_match == "random": randomized_node_ids = random.sample( xrange(1, netdata.get_total_num_nodes() + 1), max_tries) elif nonfr_match == "kbest": global_candidates = netdata.get_othernodes_iterable( fobj, should_have_interactions=True) globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors( fobj, global_candidates, interact_type, 1000, data_type=data_type, cutoff_rating=-1, min_interactions_per_user=min_interactions_per_user, time_diff=-1, time_scale=ord('w')) randomized_node_ids = [ heapq.heappop(globalk_neighbors)[1].uid for h in xrange(len(globalk_neighbors)) ] randomized_node_ids.reverse() elif nonfr_match == "serial": randomized_node_ids = range(1, max_tries + 1) else: print "Error in parameter" sys.exit(1) r_index = 0 while not found and r_index < max_tries and r_index < len( randomized_node_ids): rand_node_id = randomized_node_ids[r_index] r_index += 1 if rand_node_id in nonfr_ids: continue rand_node = netdata.nodes[rand_node_id] if rand_node.length_train_ids >= min_interactions_per_user and rand_node.length_test_ids >= min_interactions_per_user: ratio_train = abs(rand_node.length_train_ids - fobj.length_train_ids) / float( fobj.length_train_ids) if ratio_train <= max_interact_ratio_error: if rand_node.uid not in friend_ids and rand_node.uid != node.uid: if (rand_node.uid, node.uid) in sim_dict: rsim = sim_dict[(rand_node.uid, node.uid)] time_saved_counter += 1 elif (node.uid, rand_node.uid) in sim_dict: rsim = sim_dict[(node.uid, rand_node.uid)] time_saved_counter += 1 else: rsim = node.compute_node_similarity( rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(rand_node.uid, node.uid)] = rsim """ rsim2 = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) #time_diff=-1, time_scale=time_scale) """ num_rnode_interacts = rand_node.get_num_interactions( interact_type) if rsim is not None and rsim != -1: sim_diff = abs(rsim - fsim) if ( fsim == 0 and sim_diff <= 0.00001 ) or ( fsim > 0 and sim_diff / fsim <= max_sim_ratio_error ): # and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)): """ fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries if fr_nonfr_sim > 2*fsim: """ if True: found = True avg_fsim += fsim avg_rsim += rsim nonfr_ids[rand_node_id] = True control_nonfr_nodes.append( rand_node) selected_friends.append(fobj) tries += 1 if not found: #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries failed_counter += 1 #print "SEE:", len(control_nonfr_nodes), num_eligible_friends if num_eligible_friends > 0 and len( control_nonfr_nodes) >= 1 * num_eligible_friends: avg_fsim = avg_fsim / float(len(control_nonfr_nodes)) avg_rsim = avg_rsim / float(len(control_nonfr_nodes)) #print num_eligible_friends, len(selected_friends) if len(selected_friends) != len(control_nonfr_nodes): print "ALERT: Something is wrong here!!" sys.exit(2) if len(control_nonfr_nodes) != num_eligible_friends: print "WARN: Cannot match all eligible friends", num_eligible_friends, len( control_nonfr_nodes) #print node.uid, [fr.uid for fr in selected_friends] triplet_nodes.append((node, selected_friends, control_nonfr_nodes, 0, 0, 0, avg_fsim, avg_rsim)) count_success += 1 if counter % 10 == 0: print "Done counter", counter if max_node_computes is not None: if counter > max_node_computes: print counter, max_node_computes break counter += 1 print "\n--Number of nodes assigned to me(with interactions and friends):", len( nodes_list) print "--Eligible nodes (with interactions > %d): " % min_interactions_per_user, eligible_nodes_counter print "--Total Edges from eligible nodes:", edges_counter #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter print "--Number of successful nodes (can find rnodes):", count_success print "--Successful triplets:", len(triplet_nodes) # Now compare influencer effect on test set data_type = "influence_effect" data_type_code = ord(data_type[0]) influence_arr = compare_susceptibility_effect( triplet_nodes, interact_type, cutoff_rating, min_interactions_per_user, time_diff, time_scale, data_type_code, allow_duplicates) return influence_arr
def run_computation(data, computation_cmd, outf, interact_type, create_fake_prefs, allow_duplicates, split_date_str, dataset_domain, dataset_path, min_interacts_beforeaftersplit_per_user, max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio, traindata_fraction, M): net_analyzer = BasicNetworkAnalyzer(data) interaction_types = data.interact_types_dict filename_prefix = computation_cmd if computation_cmd is not None else "" if computation_cmd == "basic_stats" or computation_cmd is None: net_analyzer.show_basic_stats() ## use below if you want to write a new dataset (e.g. after filtering) data.store_ego_dataset( "/home/amit/datasets/social_activity_data/lastfm_filtered_listen/", write_maps=False) #data.compute_allpairs_sim(interact_type, data_type=ord("a")) elif computation_cmd == "random_similarity": for type_name, type_index in interaction_types.iteritems(): circlesims, globalsims = net_analyzer.compare_circle_global_similarity( type_index, num_random_trials=5, cutoff_rating=cutoff_rating) #plotter.plotLinesYY(circlesims, globalsims, "Friends", "Global") outf.write("User_id\tcircle_sim\tnonfriend_sim\n") outf.write(type_name + '\n') for ind in range(len(circlesims)): outf.write("%s\t%f\t%f\n" % (circlesims[ind][0], circlesims[ind][1], globalsims[ind][1])) print "\n", type_name, ":" print "Circle Average", sum([v2 for v1, v2 in circlesims]) / float( len(circlesims)) print "Global Average", sum([v2 for v1, v2 in globalsims]) / float( len(globalsims)) elif computation_cmd == "knn_similarity": #Compute K-nearest similarity KLIMITS = [10] outf.write("User_id\tk\tcircle_sim\tnonfriend_sim\n") for type_name, type_index in interaction_types.iteritems(): for curr_lim in KLIMITS: plot_circle, plot_external = net_analyzer.compare_circle_global_knnsimilarity( type_index, klim=curr_lim, cutoff_rating=cutoff_rating) compare_sims(plot_circle, plot_external) outf.write(type_name + '\n') for ind in range(len(plot_circle)): outf.write("%s\t%d\t%f\t%f\n" % (plot_circle[ind][0], curr_lim, plot_circle[ind][1], plot_external[ind][1])) #plotter.plotLinesYY(plot_circle, plot_external, "Friends", "Global") print type_name, "K", curr_lim print "Circle Average", utils.mean_sd( [v2 for v1, v2 in plot_circle]), len(plot_circle) print "Global Average", utils.mean_sd( [v2 for v1, v2 in plot_external]), len(plot_external) elif computation_cmd == "knn_recommender": #Compute K-nearest recommender KLIMITS = [10] rec_analyzer = RecommenderAnalyzer(data, max_recs_shown=10, traintest_split=0.7, cutoff_rating=cutoff_rating) outf.write("User_id\tk\trun_index\tcircle_ndcg\tnonfriend_ndcg\n") for type_name, type_index in interaction_types.iteritems(): for curr_lim in KLIMITS: local_avg = [] global_avg = [] Ntotal = 10 for i in range( Ntotal): # randomize because of training-test split. plot_circle, plot_external = rec_analyzer.compare_knearest_recommenders( type_index, klim=curr_lim, num_processes=2) compare_sims(plot_circle, plot_external) outf.write(type_name + "\n") for ind in range(len(plot_circle)): outf.write( "%s\t%d\t%d\t%f\t%f\n" % (plot_circle[ind][0], curr_lim, i, plot_circle[ind][1], plot_external[ind][1])) print "\n", type_name, "K", curr_lim #print plot_circle, plot_external curr_avg_local = utils.mean_sd( [v2 for v1, v2 in plot_circle]) curr_avg_global = utils.mean_sd( [v2 for v1, v2 in plot_external]) print "Circle Average", curr_avg_local print "Global Average", curr_avg_global local_avg.append(curr_avg_local[0]) global_avg.append(curr_avg_global[0]) #plotLinesYY(plot_circle, plot_external, "Friends", "Global") print "Local", sum(local_avg) / float(Ntotal) print "Global", sum(global_avg) / float(Ntotal) elif computation_cmd == "circle_coverage": lim_friends = [(5, 10), (10, 20), (20, 50), (50, 100)] for fr_limit in lim_friends: locality_analyzer = LocalityAnalyzer(data) coverage_list = locality_analyzer.compare_circle_item_coverages( 0, fr_limit[0], fr_limit[1]) plotter.plotLineY( sorted(coverage_list), "User", "Fraction of Items Covered with %d-%d friends" % (fr_limit[0], fr_limit[1])) print utils.mean_sd(coverage_list) elif computation_cmd == "items_edge_coverage": locality_analyzer = LocalityAnalyzer(data) items_cov_list, items_popularity, cov_ratio_list = locality_analyzer.compare_items_edge_coverage( 1, minimum_interactions=1) print utils.mean_sd(items_cov_list) print utils.mean_sd(items_popularity) #plotter.plotHist(sorted([val for val in cov_ratio_list if val<=1]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True) #####plotter.plotHist(sorted([val for val in cov_ratio_list]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True) #plotter.plotHist(sorted(items_popularity), "Item", "total popularity") plotter.plotCumulativePopularity( items_popularity, labelx="Item percentile", labely="Cum. percent of number of likes") elif computation_cmd == "network_draw": net_visualizor = NetworkVisualizor(data) net_visualizor.draw_network() elif computation_cmd == "network_item_adopt": net_visualizor = NetworkVisualizor(data) pprint(net_visualizor.plot_item_adoption(1669118)) elif computation_cmd == "node_details": for node_id in open('user_ids'): if node_id.strip('\n') != "User_id": net_analyzer.get_node_details(int(node_id.strip('\n'))) elif computation_cmd == "store_dataset": user_interacts = net_analyzer.get_user_interacts(1, cutoff_rating) f = open(outf_path + 'user_interacts_' + dataset_domain + '.tsv', 'w') f.write("user_id\titem_id\ttimestamp\n") for user_id, item_id, timestamp in user_interacts: f.write("%s\t%s\t%s\n" % (user_id, item_id, timestamp)) f.close() item_pop = net_analyzer.get_items_popularity(1, cutoff_rating) f = open(outf_path + 'items_' + dataset_domain + '.tsv', 'w') f.write("item_id\tpopularity\n") for item_id, pop in item_pop.iteritems(): f.write("%s\t%s\n" % (item_id, pop)) f.close() user_friends = net_analyzer.get_user_friends() f = open('user_friends_' + dataset_domain + '.tsv', 'w') f.write("user_id\tfriend_id\n") for user_id, friend_id in user_friends: f.write("%s\t%s\n" % (user_id, friend_id)) f.close() print "Successfully stored tsv dataset" elif computation_cmd == "compare_interact_types": num_interacts_dict = net_analyzer.compare_interaction_types() interact_types = num_interacts_dict.keys() plotter.plotLinesYY(num_interacts_dict[interact_types[0]], num_interacts_dict[interact_types[1]], interact_types[0], interact_types[1], display=True, logyscale=True) plotter.plotLinesYY(num_interacts_dict[interact_types[1]], num_interacts_dict[interact_types[2]], interact_types[1], interact_types[2], display=True, logyscale=True) plotter.plotLinesYY(num_interacts_dict[interact_types[0]], num_interacts_dict[interact_types[2]], interact_types[0], interact_types[2], display=True, logyscale=True) elif computation_cmd == "influence_test": # ta = TemporalAnalyzer(data) #interact_type = data.interact_types_dict["listen" # time_scale can be 'w':wallclock_time or 'o':ordinal_time split_date_str = "2008/01/01" t_window = -1 t_scale = ord('w') max_tries_val = 10000 max_node_computes_val = 100 max_interact_ratio_error = 0.1 klim_val = 5 split_timestamp = int( time.mktime( datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple())) # crate trainig test sets that will be used by fake geernation data.create_training_test_bytime(interact_type, split_timestamp) if create_fake_prefs is not None: print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences( data, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) #fake_data.generate_random_preferences(data, interact_type, split_timestamp) print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) # Need to generate again because fake data changes test data data.create_training_test_bytime(interact_type, split_timestamp) la = LocalityAnalyzer(data) inf_tuple = compute.test_influence( la, interact_type=interact_type, time_diff=t_window, time_scale=ord('w'), split_timestamp=split_timestamp, #time_diff=100000, split_date_str="1970/06/23", control_divider=0.01, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, max_tries=max_tries_val, max_node_computes=max_node_computes_val, num_processes=4, max_interact_ratio_error=max_interact_ratio_error, klim=klim_val, method="influence") print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3]) num_vals = len(inf_tuple[0]) f = open("influence_test", "w") for i in range(num_vals): f.write("%f\t%f\t%f\t%f\n" % (inf_tuple[0][i], inf_tuple[1][i], inf_tuple[2][i], inf_tuple[3][i])) f.close() elif computation_cmd == "suscept_test": use_artists = "songs" if "songs" in dataset_path else "artists" interact_type_str = "listen" if interact_type == 0 else "love" #M = [50]#,20]#,30,40,50] t_scale = ord('o') # ordinal scale, this is the default used in paper. NUM_NODES_TO_COMPUTE = 4000000 # maximum number nodes to compute? num_threads = 4 # the number of threads to spawn max_tries_val = None #30000 # should we stop after max_tries? max_node_computes_val = NUM_NODES_TO_COMPUTE / num_threads # number of nodes to compute at each node #max_interact_ratio_error =0.2 # these are errors (defaults are 0.1,0.1) #max_sim_ratio_error = 0.2 #min_friends_match_ratio = 0.5 # important to be 1 for simulation--because e.g. in influence, we use a person's all friends to compute his next like klim_val = None # not used for influence test nonfr_match = "random" #random, serial, kbest. Default is random. num_loop = 1 # number of times we calculate this. For averaging results over multiple runs. f = open( "suscept_test_results/" + dataset_domain + dataset_path.split("/")[-2] + interact_type_str + strftime("%Y-%m-%d_%H:%M:%S") + '.dat', 'w') f.write( "# use_artists=%r\tallow_duplicates=%r\tmax_node_computes_val=%d\tcreate_fake_prefs=%r\tnum_loop=%d\n" % (use_artists, allow_duplicates, max_node_computes_val, create_fake_prefs, num_loop)) f.write( "# split_train_test_date=%s\ttime_scale=%d\tmin_interactions_beforeaftersplit_per_user=%d\tnum_threads=%d\n" % (split_date_str, t_scale, min_interacts_beforeaftersplit_per_user, num_threads)) f.write( "# max_interact_ratio_error=%f\tmax_sim_ratio_error=%f\tmin_friends_match_ratio=%f\n" % (max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio)) for t_window in M: for h in range(num_loop): f.write( "\n\n################### ALERTINFO: STARTING ITERATION %d with M=%d\n" % (h, t_window)) if split_date_str == "test": split_timestamp = 2000 else: split_timestamp = int( time.mktime( datetime.datetime.strptime( split_date_str, "%Y/%m/%d").timetuple())) #split_timestamp=25000000 if create_fake_prefs is not None: data.create_training_test_bytime(interact_type, split_timestamp) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences( data, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) # Need to generate again because fake data changes test data data.create_training_test_bytime( interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user) la = LocalityAnalyzer(data) inf_tuple = compute.test_influence( la, interact_type=interact_type, time_diff=t_window, time_scale=t_scale, split_timestamp=split_timestamp, #time_diff=100000, split_date_str="1970/06/23", control_divider=0.01, # not used anymore min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, max_tries=max_tries_val, max_node_computes=max_node_computes_val, num_threads=num_threads, max_interact_ratio_error=max_interact_ratio_error, max_sim_ratio_error=max_sim_ratio_error, min_friends_match_ratio=min_friends_match_ratio, klim=klim_val, nonfr_match=nonfr_match, method="suscept", allow_duplicates=allow_duplicates) print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3]) num_vals = len(inf_tuple[0]) f.write( "TestSetSize\tFrSimilarity\tNonFrSimilarity\tFrOverlap\tNonFrOverlap\tRandom_run_no\tM\n" ) for i in range(num_vals): f.write("%d\t%f\t%f\t%f\t%f\t%d\t%d\n" % (inf_tuple[0][i], inf_tuple[1][i], inf_tuple[2][i], inf_tuple[3][i], inf_tuple[4][i], h, t_window)) f.close() elif computation_cmd == "gen_adopt_data": t_window = 100 t_scale = ord('o') if split_date_str == "test": split_timestamp = 2000 else: split_timestamp = int( time.mktime( datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple())) if create_fake_prefs is not None: data.create_training_test_bytime(interact_type, split_timestamp) #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1) fake_data.generate_fake_preferences( data, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale, method=create_fake_prefs) data.create_training_test_bytime(interact_type, split_timestamp) gen_adopt.generate_adoption_data( data, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user= min_interacts_beforeaftersplit_per_user, time_window=t_window, time_scale=t_scale) elif computation_cmd == "compute_split_date": ret_timestamp = compute.compute_cutoff_date(data, interact_type, traindata_fraction) print ret_timestamp print datetime.datetime.fromtimestamp(ret_timestamp * 86400).strftime("%Y-%m-%d") """
def __init__(self, netdata): BasicNetworkAnalyzer.__init__(self, netdata) self.interactions_stream = [] self.items_pop = defaultdict(int) self.num_users_with_interactions = 0
def compute_susceptibility_randomselect(netdata, nodes_list, interact_type, cutoff_rating, control_divider, min_interactions_per_user, time_diff, time_scale, max_tries, max_node_computes, max_interact_ratio_error, nonfr_match, allow_duplicates): # Find similarity on training set max_sim_ratio_error = 0.1 triplet_nodes = [] counter = 0 failed_counter = 0 eligible_nodes_counter = 0 count_success = 0 edges_counter = 0 total_tries_counter = 0 time_saved_counter = 0 if max_tries is None: max_tries = netdata.get_total_num_nodes() randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries) data_type="compare_train" data_type_code=ord(data_type[0]) #sim_dict = {} for node in nodes_list: nonfr_ids = {} sim_dict = {} num_node_interacts = node.get_num_interactions(interact_type) # return all interactions, no check for duplicates #if not node.has_interactions(interact_type) or not node.has_friends(): if node.length_train_ids < min_interactions_per_user or node.length_test_ids <min_interactions_per_user or not node.has_friends(): #print "Node has no interactions. Skipping!" counter +=1 continue eligible_nodes_counter += 1 fnodes = netdata.get_friends_nodes(node) control_nonfr_nodes = [] avg_fsim = 0 avg_rsim = 0 num_eligible_friends = 0 selected_friends = [] friend_ids = node.get_friend_ids() edges_counter += len(friend_ids) for fobj in fnodes: num_fobj_interacts = fobj.get_num_interactions(interact_type) if fobj.length_train_ids >=min_interactions_per_user and fobj.length_test_ids >=min_interactions_per_user: """ fsim2 = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) """ if (fobj.uid,node.uid) in sim_dict: fsim = sim_dict[(fobj.uid,node.uid)] elif (node.uid,fobj.uid) in sim_dict: fsim = sim_dict[(node.uid,fobj.uid)] else: fsim = node.compute_node_similarity(fobj, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(fobj.uid, node.uid)] = fsim #if fsim is None: # print "Error:fsim cannot be None" #print fsim found = False if fsim is not None and fsim!=-1: num_eligible_friends += 1 total_tries_counter += 1 tries=0 if nonfr_match=="random": randomized_node_ids = random.sample(xrange(1, netdata.get_total_num_nodes()+1), max_tries) elif nonfr_match=="kbest": global_candidates = netdata.get_othernodes_iterable(fobj, should_have_interactions=True) globalk_neighbors = BasicNetworkAnalyzer.compute_knearest_neighbors(fobj, global_candidates, interact_type,1000, data_type=data_type, cutoff_rating = -1, min_interactions_per_user=min_interactions_per_user, time_diff=-1, time_scale=ord('w')) randomized_node_ids = [heapq.heappop(globalk_neighbors)[1].uid for h in xrange(len(globalk_neighbors))] randomized_node_ids.reverse() elif nonfr_match=="serial": randomized_node_ids = range(1, max_tries+1) else: print "Error in parameter"; sys.exit(1) r_index = 0 while not found and r_index < max_tries and r_index<len(randomized_node_ids): rand_node_id = randomized_node_ids[r_index] r_index += 1 if rand_node_id in nonfr_ids: continue rand_node = netdata.nodes[rand_node_id] if rand_node.length_train_ids >=min_interactions_per_user and rand_node.length_test_ids >=min_interactions_per_user: ratio_train = abs(rand_node.length_train_ids-fobj.length_train_ids)/float(fobj.length_train_ids) if ratio_train <= max_interact_ratio_error: if rand_node.uid not in friend_ids and rand_node.uid!=node.uid: if (rand_node.uid,node.uid) in sim_dict: rsim = sim_dict[(rand_node.uid,node.uid)] time_saved_counter += 1 elif (node.uid,rand_node.uid) in sim_dict: rsim = sim_dict[(node.uid,rand_node.uid)] time_saved_counter += 1 else: rsim = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) sim_dict[(rand_node.uid, node.uid)] = rsim """ rsim2 = node.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=500000, time_scale=ord('w'))#time_diff=-1, time_scale=time_scale) #time_diff=-1, time_scale=time_scale) """ num_rnode_interacts = rand_node.get_num_interactions(interact_type) if rsim is not None and rsim!=-1: sim_diff = abs(rsim-fsim) if (fsim==0 and sim_diff<=0.00001) or (fsim>0 and sim_diff/fsim <= max_sim_ratio_error):# and (fsim2 >0 and abs(rsim2-fsim2)/fsim2<=max_sim_ratio_error)): """ fr_nonfr_sim = fobj.compute_node_similarity(rand_node, interact_type, cutoff_rating, data_type_code, min_interactions_per_user, time_diff=-1, time_scale=time_scale) print fr_nonfr_sim, node.length_train_ids, fobj.length_train_ids, rand_node.length_train_ids, fsim, rsim, r_index, max_tries if fr_nonfr_sim > 2*fsim: """ if True: found = True avg_fsim += fsim avg_rsim += rsim nonfr_ids[rand_node_id] = True control_nonfr_nodes.append(rand_node) selected_friends.append(fobj) tries += 1 if not found: #print "Could not get random non-friend with sim", fsim, "in %d tries" %tries failed_counter += 1 #print "SEE:", len(control_nonfr_nodes), num_eligible_friends if num_eligible_friends >0 and len(control_nonfr_nodes) >= 1*num_eligible_friends: avg_fsim = avg_fsim/float(len(control_nonfr_nodes)) avg_rsim = avg_rsim/float(len(control_nonfr_nodes)) #print num_eligible_friends, len(selected_friends) if len(selected_friends) != len(control_nonfr_nodes): print "ALERT: Something is wrong here!!"; sys.exit(2) if len(control_nonfr_nodes) != num_eligible_friends: print "WARN: Cannot match all eligible friends", num_eligible_friends, len(control_nonfr_nodes) #print node.uid, [fr.uid for fr in selected_friends] triplet_nodes.append((node, selected_friends, control_nonfr_nodes, 0, 0, 0, avg_fsim, avg_rsim)) count_success +=1 if counter %10==0: print "Done counter", counter if max_node_computes is not None: if counter > max_node_computes: print counter, max_node_computes break counter += 1 print "\n--Number of nodes assigned to me(with interactions and friends):", len(nodes_list) print "--Eligible nodes (with interactions > %d): " %min_interactions_per_user, eligible_nodes_counter print "--Total Edges from eligible nodes:", edges_counter #print "--Eligible friend-edges (with friend hving interactions >%d): " %min_interactions_per_user, eligible_edges_counter print "--Number of tries (and successful caches) to find random non-friend:", total_tries_counter, time_saved_counter print "--Number of successful nodes (can find rnodes):", count_success print "--Successful triplets:", len(triplet_nodes) # Now compare influencer effect on test set data_type="influence_effect" data_type_code=ord(data_type[0]) influence_arr = compare_susceptibility_effect(triplet_nodes, interact_type, cutoff_rating, min_interactions_per_user, time_diff, time_scale, data_type_code, allow_duplicates) return influence_arr
import numpy as np import operator from network_analyzer_example import * import compare_adopt_share from compare_adopt_share import * import socintpy.util.plotter as plotter from socintpy.networkcompute.basic_network_analyzer import BasicNetworkAnalyzer if __name__ == "__main__": if len(sys.argv) ==2: print sys.argv[1] # if True, then use raw_data csvs and store as ego_nets data = get_data(bool(sys.argv[1])) else: data = get_data() net_analyzer = BasicNetworkAnalyzer(data) #net_analyzer.show_basic_stats() na = AdoptShareComparer(data)