def main(graph_name): cutting_day = 175 # to separate training-testing G = nx.read_gml(graph_name) list_id_weekends_T3 = look_for_T3_weekends( G ) # T3 doesnt share fellows in the weekend (but they are the exception) all_team = "NO" # as adopters or not Nbins = 20 # for the histogram of sum of distances dir_real_data = '../Results/' dir = "../Results/weight_shifts/infection/" delta_end = 3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) Niter_training = 1000 fixed_param = "" #"FIXED_Pimm0_" # or "" # for the Results file that contains the sorted list of best parameters output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_train_test_" + str( Niter_training) + "iter.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" file1 = open( filename_actual_evol, 'r' ) ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file = file1.readlines() list_actual_evol = [] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters = float(line.split(" ")[1]) list_actual_evol.append(num_adopters) list_actual_evol_training = list_actual_evol[:cutting_day] # list_actual_evol_testing=list_actual_evol[(cutting_day-1):] #i dont use this ################################################################## #../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat prob_min = 0.0 prob_max = 1.01 delta_prob = 0.1 prob_Immune_min = 0.00 prob_Immune_max = 1.01 delta_prob_Immune = 0.1 list_dist_at_ending_point_fixed_parameters = [] dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one dict_filenames_prod_distances = {} prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection output_file2 = dir + "Average_time_evolution_Infection_training_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str( Niter_training) + "iter_2012_avg_ic_day" + str( cutting_day) + ".dat" # file2 = open(output_file2,'wt') I DONT NEED TO WRITE IT, COS I WILL USE THE WHOLE FILE FROM THE WHOLE FIT, WITH THE PARAMETER VALUES THAT THE TESTING-UP-TODAY-125 TELLS ME # file2.close() # i create the empty list of list for the Niter temporal evolutions num_shifts = 0 num_Drs = 0. for n in G.nodes(): G.node[n]["status"] = "S" if G.node[n]['type'] == "shift": num_shifts += 1 else: num_Drs += 1. # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions = [] list_dist_fixed_parameters = [] list_dist_abs_at_ending_point_fixed_parameters = [] list_final_num_infected = [] for iter in range(Niter_training): # print " iter:",iter list_I = [] #list infected doctors list_ordering = [] list_s = [] list_A = [] list_F = [] ########### set I.C. max_order = 0 for n in G.nodes(): G.node[n]["status"] = "S" # all nodes are Susceptible if G.node[n]['type'] == "shift": list_s.append(n) if G.node[n]['order'] > max_order: max_order = G.node[n]['order'] else: if G.node[n]['label'] == "Wunderink" or G.node[n][ "label"] == "Weiss": G.node[n]["status"] = "I" list_I.append(G.node[n]['label']) if G.node[n]['type'] == "A": list_A.append(n) if G.node[n]['type'] == "F": list_F.append(n) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[n][ "label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" # print max_order ################# the dynamics starts: t = 1 while t < cutting_day: # loop over shifts, in order just until cutting day (training segment) for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: shift_lenght = int(G.node[n]['shift_lenght']) if shift_lenght == 2 and n not in list_id_weekends_T3: shift_lenght = 1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) # print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght'] flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection for i in range(shift_lenght): if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: G.node[doctor]["status"] = "I" if G.node[doctor][ "type"] == "A": list_I.append( G.node[doctor] ["label"]) list_single_t_evolution.append(float( len(list_I))) #/(len(list_A)+len(list_F))) t += 1 ######## end t loop list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training, list_single_t_evolution)) list_dist_abs_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol_training[-1]) ) # i save the distance at the ending point between the current simu and actual evol # print "actual:",len(list_actual_evol_training)," simu:",len(list_single_t_evolution) # 125, 125 list_final_num_infected.append(list_single_t_evolution[-1]) list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1] - list_actual_evol_training[-1] ) # i save the distance at the ending point between the current simu and actual evol ######## end loop Niter for the training fase list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) file3 = open(output_file3, 'at') # i print out the landscape print >> file3, prob_infection, prob_Immune, numpy.mean( list_dist_abs_at_ending_point_fixed_parameters ), numpy.mean(list_dist_fixed_parameters), numpy.mean( list_final_num_infected ), numpy.std(list_final_num_infected), numpy.std( list_final_num_infected) / numpy.mean(list_final_num_infected) file3.close() histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_test_train_infection_p" + str( prob_infection) + "_Immune" + str(prob_Immune) + "_" + str( Niter_training) + "iter_day" + str(cutting_day) + ".dat" histograma_gral_negv_posit.histograma( list_dist_at_ending_point_fixed_parameters, histogram_filename) histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_training_p" + str( prob_infection ) + "_" + "Immune" + str(prob_Immune) + "_" + str( Niter_training) + "iter_day" + str(cutting_day) + ".dat" histograma_bines_gral.histograma_bins(list_dist_fixed_parameters, Nbins, histogram_filename2) print "written histogram file: ", histogram_filename print "written histogram file: ", histogram_filename2 value = numpy.mean(list_dist_fixed_parameters) * numpy.mean( list_dist_abs_at_ending_point_fixed_parameters ) # if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!! dict_filenames_prod_distances[output_file2] = value if ( numpy.mean(list_dist_abs_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end print numpy.mean(list_dist_abs_at_ending_point_fixed_parameters ), "added scenario:", output_file2 # file2 = open(output_file2,'at') #for s in range(len(list_single_t_evolution)): # list_fixed_t=[] # for iter in range (Niter_training): # list_fixed_t.append(list_lists_t_evolutions[iter][s]) #print >> file2, s,numpy.mean(list_fixed_t) #file2.close() prob_infection += delta_prob prob_Immune += delta_prob_Immune list_order_dict = compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Infection_training_weight", all_team, Niter_training, cutting_day) # it returns a list of tuples like this : ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0]) the best set of parameters being the fist one of the elements in the list. string_name = "infection_training_" + fixed_param + str( Niter_training) + "iter_day" + str( cutting_day ) + ".dat" # for the "Results" file with the sorted list of files list_order_dict2 = compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances( dict_filenames_prod_distances, string_name, all_team, Niter_training, cutting_day) optimum_filename = list_order_dict[0][0] prob_infection = float(list_order_dict[0][0].split("_p")[1].split("_")[0]) prob_Immune = float( list_order_dict[0][0].split("_Immune")[1].split("_")[0]) print "Optimum parameters (old method) at day", cutting_day, " are: p=", prob_infection, " and Pimmune=", prob_Immune # i already know the optimum, now i run the dynamics with those values, starting from the average state on the cutting point, and test: optimum_filename = list_order_dict2[0][0] prob_infection = float(list_order_dict2[0][0].split("_p")[1].split("_")[0]) prob_Immune = float( list_order_dict2[0][0].split("_Immune")[1].split("_")[0]) print "Optimum parameters (product of distances along_traject and at the end) at day", cutting_day, " are: p=", prob_infection, " and Pimmune=", prob_Immune print "Run that simulation with the optimum parameter set:", optimum_filename print "printed out landscape file:", output_file3 output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_p" + str( prob_infection) + "_" + "Immune" + str(prob_Immune) + "_" + str( Niter_training) + "iter_avg_ic_day" + str(cutting_day) + ".dat" file10 = open(output_file10, 'wt') print >> file10, "Summary results from train-testing persuasion with", Niter_training, "iter , using all the individual cutting points as IC, and with values for the parameters: prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n" print >> file10, "Look for the file (or run that simulation) with the optimum parameter set:", optimum_filename file10.close()
def main(): pupulation_age = "All" #"young" # or "adult" or "All" if pupulation_age == "young": min_age_threshold = 0 max_age_threshold = 15 elif pupulation_age == "adult": min_age_threshold = 16 max_age_threshold = 100 elif pupulation_age == "All": min_age_threshold = 0 max_age_threshold = 100 else: print "wrong age range" exit() R = 10 P = 5 ####### to select results only from given rounds (both ends included) min_round = 1 max_round = 18 ######### input file filename = "../Data/userdata.pickle" master_list = pickle.load(open( filename, 'rb')) # es una lista: un elemento por jugador (541) ######### ######### output files Nbins_fraction_coop = 15 name_h_fraction_coop = "../Results/histogram_fraction_coop_tot_users.dat" Nbins_tot_payoff = 20 name_h_tot_payoff = "../Results/histogram_tot_payoff_users.dat" Nbins_avg_payoff = 20 name_h_avg_payoff = "../Results/histogram_avg_payoff_users.dat" output_filename1 = "../Results/Scatter_plot_cooperation_tot_and_avg_payoff.dat" output1 = open(output_filename1, 'wt') ######### ### master_list tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...], 'nickname': u'Caesar', 'id': 2L}] #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda) # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...] num_valid_actions = 0. num_lower_H_actions = 0 num_higher_H_actions = 0 coop_actions_higher = 0 coop_actions_lower = 0 dict_user_id_list_coop = {} dict_user_id_list_tot_payoff = {} dict_user_id_gender = {} dict_user_list_actions_in_lower_Harmony = {} dict_user_avg_coop_in_lower_Harmony = {} dict_user_id_tot_numelections = {} dict_user_id_list_strat = {} num_users = float(len(master_list)) list_cooperators_in_lower_Harmony = [] list_cooperators_in_higher_Harmony = [] list_defectors_in_lower_Harmony = [] for dictionary in master_list: # cada elemento de la lista es a su vez un dict nickname = unidecode(dictionary['nickname']).replace(" ", "_") user_id = dictionary['id'] payoff_total = float(dictionary['guany_total'] ) # this is calculated only up to round #13 !! partida = dictionary['partida'] gender = dictionary['genere'] if gender == "h": gender = 1 elif gender == "d": gender = 0 dict_user_id_gender[user_id] = gender num_elecciones = int(dictionary['num_eleccions']) age = int(dictionary['edat']) avg_racionalidad = dictionary['rationality'] avg_ambicion = dictionary['ambition'] num_rondas = len(dictionary['rondes']) dict_user_id_tot_numelections[user_id] = num_elecciones if user_id not in dict_user_id_list_coop: dict_user_id_list_coop[user_id] = [] dict_user_id_list_tot_payoff[user_id] = [] dict_user_id_list_strat[user_id] = [] list_dict_rondas = dictionary['rondes'] for dict_ronda in list_dict_rondas: ## cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0} T = int(dict_ronda['T']) S = int(dict_ronda['S']) list_four_possible_values = [P, R, T, S] punto_TS = (T, S) try: payoff = float(dict_ronda['guany']) payoff_norm = float(dict_ronda['guany']) / float( max(list_four_possible_values)) except TypeError: payoff = dict_ronda['guany'] # if payoff is None payoff_oponent = dict_ronda['guany_oponent'] rationality = dict_ronda['rationality'] ambition = dict_ronda['ambition'] round_number = dict_ronda['numronda'] action = dict_ronda['seleccio'] if action == "C": action = 1. elif action == "D": action = 0. # si no ha elegido nada, es None if action != None: num_valid_actions += 1 if user_id not in dict_user_list_actions_in_lower_Harmony: dict_user_list_actions_in_lower_Harmony[user_id] = [] if action != None: dict_user_id_list_coop[user_id].append(action) if payoff != None: dict_user_id_list_tot_payoff[user_id].append(payoff) num_ronda = dict_ronda['numronda'] quadrant = dict_ronda['cuadrant'].replace(" ", "_").replace("'", "") action_oponent = dict_ronda['seleccio_oponent'] if action_oponent == "C": action_oponent = 1. elif action_oponent == "D": action_oponent = 0. # si no ha elegido nada, es None oponent_id = dict_ronda['oponent'] random_action = random.choice([0, 1]) # print "\nR:",R, " S:",S, " T:", T, " P:",P, " action:",action, " payoff:",payoff strat = None strat1 = None if action == 1: if R == max(R, S, T, P) or S == max(R, S, T, P): strat1 = "max_payoff" elif action == 0: if T == max(R, S, T, P) or T == max(R, S, T, P): strat1 = "max_payoff" max_diff = max(R - R, S - T, T - S, P - P) min_diff = min(R - R, S - T, T - S, P - P) strat2 = None if action == 1: if S - T == max_diff: strat2 = "max_diff" elif action == 0: if T - S == max_diff: strat2 = "max_diff" if strat1 != None and strat2 != None: if "payoff" in strat1: strat = strat1 + " " + strat2 else: strat = strat2 + " " + strat1 else: if strat1 == None: strat = strat2 else: strat = strat1 dict_user_id_list_strat[user_id].append(strat) if S >= 5 and S <= 10: if T >= 5 and T <= 10: if S <= T: # the lower triangle of the Harmony game: if action == 1: if user_id not in list_cooperators_in_lower_Harmony: list_cooperators_in_lower_Harmony.append( user_id) coop_actions_lower += 1 elif action == 0: if user_id not in list_defectors_in_lower_Harmony: list_defectors_in_lower_Harmony.append(user_id) if action != None: dict_user_list_actions_in_lower_Harmony[ user_id].append(action) num_lower_H_actions += 1 else: # the upper triangle of the Harmony game: if action == 1: if user_id not in list_cooperators_in_higher_Harmony: list_cooperators_in_higher_Harmony.append( user_id) coop_actions_higher += 1 num_higher_H_actions += 1 ###### end loop over lines in the main dict list_frac_coop_users = [] list_tot_payoff_users = [] list_avg_payoff_users = [] dict_common_strat_num_users = {} dict_strat_num_users = {} for user_id in dict_user_id_list_coop: fract_coop = numpy.mean(dict_user_id_list_coop[user_id]) list_frac_coop_users.append(fract_coop) tot_payoff = sum(dict_user_id_list_tot_payoff[user_id]) list_tot_payoff_users.append(tot_payoff) avg_payoff = numpy.mean(dict_user_id_list_tot_payoff[user_id]) list_avg_payoff_users.append(avg_payoff) common_strat = max(set(dict_user_id_list_strat[user_id]), key=dict_user_id_list_strat[user_id].count ) # most common element in the list print >> output1, fract_coop, tot_payoff, avg_payoff, dict_user_id_gender[ user_id], dict_user_id_tot_numelections[user_id] ##### counting most common strategy per user if common_strat not in dict_common_strat_num_users: dict_common_strat_num_users[common_strat] = 0 dict_common_strat_num_users[common_strat] += 1 ######## counting all strategies for strat in dict_user_id_list_strat[user_id]: if strat not in dict_strat_num_users: dict_strat_num_users[strat] = 0 dict_strat_num_users[strat] += 1 list_avg_cooperators_lower_H = [] for user_id in dict_user_list_actions_in_lower_Harmony: dict_user_avg_coop_in_lower_Harmony[user_id] = numpy.mean( dict_user_list_actions_in_lower_Harmony[user_id]) print user_id, dict_user_list_actions_in_lower_Harmony[ user_id], dict_user_avg_coop_in_lower_Harmony[user_id] if dict_user_avg_coop_in_lower_Harmony[user_id] > 0.5: list_avg_cooperators_lower_H.append(user_id) print len(dict_user_list_actions_in_lower_Harmony), len( list_avg_cooperators_lower_H) raw_input() histograma_bines_gral.histograma_bins( list_frac_coop_users, Nbins_fraction_coop, name_h_fraction_coop ) #x_position , norm_count, count, norm_cumulat_count, cumulat_count , float(hist[b])/float(len(lista)) histograma_bines_gral.histograma_bins(list_tot_payoff_users, Nbins_tot_payoff, name_h_tot_payoff) histograma_bines_gral.histograma_bins(list_avg_payoff_users, Nbins_avg_payoff, name_h_avg_payoff) output1.close() print "written output file:", output_filename1 print "common strategies within user:"******"\nall strategies:" for key in dict_strat_num_users: print key, dict_strat_num_users[ key], dict_strat_num_users[key] / num_valid_actions print "# items in the pickle:", len(master_list) print "\n# unique coop in lower Harmony:", len( list_cooperators_in_lower_Harmony ), " # actions in lower H:", num_lower_H_actions, " fract_coop:", coop_actions_lower / float( num_lower_H_actions), " # avg cooperatos (>0.5) in lower H:", len( list_avg_cooperators_lower_H) print "\n# unique defectors in lower Harmony:", len( list_defectors_in_lower_Harmony) print "\nintersection unique users cooperators and defectors in lower Harmony", len( list( set(list_cooperators_in_lower_Harmony) & set(list_defectors_in_lower_Harmony))) print "\n# unique coop in higher Harmony:", len( list_cooperators_in_higher_Harmony ), " # actions in higher H:", num_higher_H_actions, " fract_coop:", coop_actions_higher / float( num_higher_H_actions) print " tot # valid actions:", num_valid_actions, " tot # users:", num_users #output2= open("../Results/pickle_cooperators_lower_H.pickle",'wt') #pickle.dump(list_avg_cooperators_lower_H, output2) ####### i read the file again to compare levels of cooperations for some sets of users list_actions_all_users = [] list_actions_coop_in_lower_H = [] list_actions_NO_coop_in_lower_H = [] for dictionary in master_list: # cada elemento de la lista es a su vez un dict user_id = dictionary['id'] list_dict_rondas = dictionary['rondes'] for dict_ronda in list_dict_rondas: try: payoff = float(dict_ronda['guany']) payoff_norm = float(dict_ronda['guany']) / float( max(list_four_possible_values)) except TypeError: payoff = dict_ronda['guany'] # if payoff is None payoff_oponent = dict_ronda['guany_oponent'] action = dict_ronda['seleccio'] if action == "C": action = 1. elif action == "D": action = 0. # si no ha elegido nada, es None if action != None: list_actions_all_users.append(action) if user_id in list_cooperators_in_lower_Harmony: list_actions_coop_in_lower_H.append(action) else: list_actions_NO_coop_in_lower_H.append(action) print "avg coop all users:", numpy.mean(list_actions_all_users), len( list_actions_all_users), " avg coop special set:", numpy.mean( list_actions_coop_in_lower_H), len( list_actions_coop_in_lower_H ), " avg coop NO special set:", numpy.mean( list_actions_NO_coop_in_lower_H), len( list_actions_NO_coop_in_lower_H)
def main(graph_name): cutting_day = 125 # to separate training-testing G = nx.read_gml(graph_name) all_team = "NO" # as adopters or not dir_real_data = '../Results/' delta_end = 3 # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) Niter_training = 5 Niter_testing = 5 ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv" else: filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv" #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs list_actual_evol = [] result_actual_file = csv.reader(open(filename_actual_evol, 'rb'), delimiter=',') cont = 0 for row in result_actual_file: if cont > 0: # i ignore the first line with the headers num_adopters = row[3] list_actual_evol.append(float(num_adopters)) cont += 1 list_actual_evol_training = list_actual_evol[:cutting_day] list_actual_evol_testing = list_actual_evol[(cutting_day - 1):] ################################################################## #../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat prob_min = 0.00 prob_max = 1.01 delta_prob = 0.1 prob_Immune_min = 0.00 prob_Immune_max = 1.01 delta_prob_Immune = 0.1 dir = "../Results/network_final_schedule_withTeam3_local/infection/" dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one dict_filenames_list_dict_network_states = { } # i will save the filename as key and the list of networks at cutting day as value prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection output_file2 = dir + "Average_time_evolution_Infection_training_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter_training) + "iter_2012.dat" file2 = open(output_file2, 'wt') file2.close() # i create the empty list of list for the Niter temporal evolutions num_shifts = 0 for n in G.nodes(): G.node[n]["status"] = "S" if G.node[n]['type'] == "shift": num_shifts += 1 # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions = [] list_dist_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_dict_network_states = [] list_networks_at_cutting_day = [] for iter in range(Niter_training): print " iter:", iter dict_network_states = {} list_I = [] #list infected doctors list_ordering = [] list_s = [] list_A = [] list_F = [] ########### set I.C. max_order = 0 for n in G.nodes(): G.node[n]["status"] = "S" # all nodes are Susceptible if G.node[n]['type'] == "shift": list_s.append(n) if G.node[n]['order'] > max_order: max_order = G.node[n]['order'] else: if G.node[n]['label'] == "Wunderink" or G.node[n][ "label"] == "Weiss": G.node[n]["status"] = "I" list_I.append(G.node[n]['label']) ######################## WHAT ABOUT SMITH AND SPORN??? if G.node[n]['type'] == "A": list_A.append(n) if G.node[n]['type'] == "F": list_F.append(n) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[n][ "label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" # print max_order ################# the dynamics starts: t = 1 while t < cutting_day: # loop over shifts, in order just until cutting day (training segment) for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: G.node[doctor]["status"] = "I" list_I.append( G.node[doctor]["label"]) list_single_t_evolution.append(float( len(list_I))) #/(len(list_A)+len(list_F))) t += 1 for n in G.nodes(): if G.node[n]['type'] != "shift": dict_network_states[G.node[n] ["label"]] = G.node[n]["status"] list_dict_network_states.append(dict_network_states) ######## end t loop list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training, list_single_t_evolution)) list_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol_training[-1]) ) # i save the distance at the ending point between the current simu and actual evol ######## end loop Niter for the training fase list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean(list_dist_at_ending_point_fixed_parameters)) if ( numpy.mean(list_dist_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end dict_filenames_list_dict_network_states[ output_file2] = list_dict_network_states file2 = open(output_file2, 'at') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter_training): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file2, s, numpy.mean(list_fixed_t) file2.close() prob_infection += delta_prob prob_Immune += delta_prob_Immune list_order_dict = compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Infection_training", all_team, Niter_training) # it returns a list of tuples like this : ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0]) the best set of parameters being the fist one of the elements in the list. optimum_filename = list_order_dict[0][0] prob_infection = float(list_order_dict[0][0].split("_p")[1][0:3]) prob_Immune = float(list_order_dict[0][0].split("_Immune")[1][0:3]) # raw_input() print "starting testing fase with:" print "p=", prob_infection, " and Pimmune=", prob_Immune # i already know the optimum, now i run the dynamics with those values, starting from the average state on the cutting point, and test: list_dist_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_lists_t_evolutions = [] lista_num_infect = [] lista_I_drs = [] dict_tot_I_doctors = {} lista_num_imm = [] lista_Imm_drs = [] dict_tot_Imm_doctors = {} for dictionary in dict_filenames_list_dict_network_states[ optimum_filename]: # dictionary={Dr1:status, Dr2:status,} # one dict per iteration num_I = 0. num_Imm = 0. #raw_input() for key in dictionary: if dictionary[key] == "I": num_I += 1. if key not in lista_I_drs: lista_I_drs.append(key) dict_tot_I_doctors[key] = 1. else: dict_tot_I_doctors[key] += 1. elif dictionary[key] == "Immune": num_Imm += 1. if key not in lista_Imm_drs: lista_Imm_drs.append(key) dict_tot_Imm_doctors[key] = 1. else: dict_tot_Imm_doctors[key] += 1. lista_num_infect.append(num_I) lista_num_imm.append(num_Imm) avg_inf_drs = int( numpy.mean(lista_num_infect)) # i find out the average num I print "avg of inf:", numpy.mean(lista_num_infect), avg_inf_drs, numpy.std( lista_num_infect) if numpy.mean(lista_num_infect) - avg_inf_drs >= 0.5: avg_inf_drs += 1.0 # print avg_inf_drs avg_imm_drs = int( numpy.mean(lista_num_imm)) # i find out the average num Immune #print "avg of imm:", numpy.mean(lista_num_imm),avg_imm_drs,numpy.std(lista_num_imm) if numpy.mean(lista_num_imm) - avg_imm_drs >= 0.5: avg_imm_drs += 1.0 # print avg_imm_drs # i sort the list from more frequently infected to less list_sorted_dict = sorted(dict_tot_I_doctors.iteritems(), key=operator.itemgetter(1)) new_list_sorted_dict = list_sorted_dict new_list_sorted_dict.reverse() # print "I:",new_list_sorted_dict #list_sorted_dict=[(u'Weiss', 5.0), (u'Wunderink', 5.0), (u'Keller', 4.0), (u'Go', 3.0), (u'Cuttica', 3.0), (u'Rosario', 2.0), (u'Radigan', 2.0), (u'Smith', 2.0), (u'RosenbergN', 2.0), (u'Gillespie', 1.0), (u'Osher', 1.0), (u'Mutlu', 1.0), (u'Dematte', 1.0), (u'Hawkins', 1.0), (u'Gates', 1.0)] dict_infect_prob_being_so = {} for item in new_list_sorted_dict: dict_infect_prob_being_so[item[0]] = 0. tot_sum_inf_so_far = 0. for item in new_list_sorted_dict: dict_infect_prob_being_so[item[0]] = item[1] + tot_sum_inf_so_far tot_sum_inf_so_far += item[1] list_sorted_dict_infect_prob_being_so = sorted( dict_infect_prob_being_so.iteritems(), key=operator.itemgetter(1)) # new_list_sorted_dict_dict_infect_prob_being_so=list_sorted_dict_infect_prob_being_so # new_list_sorted_dict_dict_infect_prob_being_so.reverse() print list_sorted_dict_infect_prob_being_so #print tot_sum_inf_so_far # i sort the list from more frequently imm to less list_sorted_dict_imm = sorted(dict_tot_Imm_doctors.iteritems(), key=operator.itemgetter(1)) new_list_sorted_dict_imm = list_sorted_dict_imm new_list_sorted_dict_imm.reverse() # print "Immunes:",new_list_sorted_dict_imm dict_imm_prob_being_so = {} for item in new_list_sorted_dict_imm: dict_imm_prob_being_so[item[0]] = 0. tot_sum_imm_so_far = 0. for item in new_list_sorted_dict_imm: dict_imm_prob_being_so[item[0]] = item[1] + tot_sum_imm_so_far tot_sum_imm_so_far += item[1] list_sorted_dict_imm_prob_being_so = sorted( dict_imm_prob_being_so.iteritems(), key=operator.itemgetter(1)) # new_list_sorted_dict_dict_imm_prob_being_so=list_sorted_dict_imm_prob_being_so #new_list_sorted_dict_dict_imm_prob_being_so.reverse() print list_sorted_dict_imm_prob_being_so #new_list_sorted_dict_dict_imm_prob_being_so for iter in range(Niter_testing): # print " iter:",iter, len(list_I) raw_input() # i establish the initial conditions (as probabilistically, according to the cutting point distribution of status) dict_label_node = {} list_I = [] #list infected doctors list_Immune = [] #list infected doctors for node in G.nodes(): if G.node[node]['type'] != "shift": label = G.node[node]['label'] G.node[node]["status"] = "S" #by default, all are susceptible dict_label_node[label] = node ii = 0. while ii <= avg_inf_drs: rand = random.random() * tot_sum_inf_so_far for i in range(len(list_sorted_dict_infect_prob_being_so)): if rand <= list_sorted_dict_infect_prob_being_so[i][1]: label = list_sorted_dict_infect_prob_being_so[i][0] current_prob_value = list_sorted_dict_infect_prob_being_so[ i][1] node = dict_label_node[label] G.node[node]["status"] = "I" print label, "got infected" list_I.append(label) #update the prob of being infected: for jj in range(len(list_sorted_dict_infect_prob_being_so)): if list_sorted_dict_infect_prob_being_so[jj][ 1] >= current_prob_value: print list_sorted_dict_infect_prob_being_so[jj][1] list_sorted_dict_infect_prob_being_so[jj][ 1] -= current_prob_value list_sorted_dict_infect_prob_being_so[i][ 1] = 0. #so i don't pick it again ii += 1. ii = 0. while ii <= avg_imm_drs: rand = random.random() * tot_sum_imm_so_far for i in range(len(list_sorted_dict_imm_prob_being_so)): if rand <= list_sorted_dict_imm_prob_being_so[i][1]: label = list_sorted_dict_imm_prob_being_so[i][0] node = dict_label_node[label] G.node[node]["status"] = "Immune" print label, "got immune" list_Immune.append(label) #update the prob of being infected: for jj in range(len(list_sorted_dict_imm_prob_being_so)): if list_sorted_dict_imm_prob_being_so[jj][ 1] >= current_prob_value: list_sorted_dict_imm_prob_being_so[jj][ 1] -= current_prob_value list_sorted_dict_imm_prob_being_so[i][ 1] = 0. #so i don't pick it again ii += 1. list_single_t_evolution = [] list_single_t_evolution.append(len(list_I)) t = cutting_day while t <= max_order: # loop over shifts, in order just until cutting day (training segment) for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n]['order'] == t: flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: G.node[doctor]["status"] = "I" list_I.append(G.node[doctor]["label"]) list_single_t_evolution.append(float(len(list_I))) print t, len(list_I) t += 1 list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_testing, list_single_t_evolution)) print " dist:", list_dist_fixed_parameters[-1] list_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol_testing[-1]) ) # i save the distance at the ending point between the current simu and actual evol ############### end loop Niter for the testing num_valid_endings = 0. for item in list_dist_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings += 1. print "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters), list_dist_fixed_parameters print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter_testing, list_dist_at_ending_point_fixed_parameters output_file5 = dir + "Average_time_evolution_Infection_testing_prob_ic_p" + str( prob_infection) + "_" + "Immune" + str(prob_Immune) + "_" + str( Niter_testing) + "iter_2012.dat" file5 = open(output_file5, 'wt') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter_testing): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file5, s + cutting_day, numpy.mean(list_fixed_t) # print s+cutting_day,numpy.mean(list_fixed_t) file5.close() print "written:", output_file5 histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, 150, "../Results/histogr_tot_distances_testing_avg_ic_segment") histograma_gral.histograma( list_dist_at_ending_point_fixed_parameters, "../Results/histogr_distances_ending_testing_avg_ic_segment")
def main(): #full_network_filename="./network_all_users/full_network_all_users.gml" # i CANT use this network, because the labels dont match the users id from the dB # G_full = nx.read_gml(full_network_filename) # list_A=[] #Testign out how KS works on a random sample # list_B=[] #for i in range (10000): # list_A.append(random.random()) # list_B.append(random.random()) #print "KS test listA against normal distrib:", stats.kstest(list_A, "norm" ) # print "KS test listB against normal distrib:", stats.kstest(list_B, "norm" ) #print "two-sided KS test listA vs listB:", stats.ks_2samp(list_A, list_B) unrealistic_weight_change = 70. database = "calorie_king_social_networking_2010" server = "tarraco.chem-eng.northwestern.edu" user = "******" passwd = "n1ckuDB!" db = Connection(server, database, user, passwd) GC_network_filename = "./network_all_users/GC_full_network_all_users_merged_small_comm_roles_diff_layers1_roles_diff_layers1.5.gml" G = nx.read_gml(GC_network_filename) output_filename = "./network_all_users/Results_comparison_histograms_percent_weight_change.txt" file_output = open(output_filename, 'wt') # print "num. nodes:",len(G.nodes()) list_of_lists = nx.connected_components(G) print "num. of components:", len(list_of_lists), "size GC:", len( list_of_lists[0]) list_weight_changes_GC = [] list_weight_changes_R6friends = [] for node in G.nodes(): label = G.node[node]["label"] percent_weight_change = G.node[node]["percentage_weight_change"] R6_overlap = G.node[node]["R6_overlap"] #print node, label, weight_change, R6_overlap if percent_weight_change > -unrealistic_weight_change and percent_weight_change < unrealistic_weight_change: # filter out unrealistic values list_weight_changes_GC.append(percent_weight_change) if R6_overlap > 0: list_weight_changes_R6friends.append(percent_weight_change) print >> file_output, "num GC users:", len( list_weight_changes_GC), "num users with R6 friends:", len( list_weight_changes_R6friends) histograma_bines_gral.histograma_bins( list_weight_changes_GC, 20, "./network_all_users/histogram_weight_change_GC_users.dat") histograma_bines_gral.histograma_bins( list_weight_changes_R6friends, 20, "./network_all_users/histogram_weight_change_users_with_R6friends.dat") print >> file_output, "KS test GC against normal distrib:", stats.kstest( list_weight_changes_GC, "norm") print >> file_output, "KS test users with R6 friends against normal distrib:", stats.kstest( list_weight_changes_R6friends, "norm") print >> file_output, "two-sided KS test GC vs users with R6 friends:", stats.ks_2samp( list_weight_changes_GC, list_weight_changes_R6friends) list_weight_changes_all = [] query1 = """SELECT * FROM users""" result1 = db.query(query1) # is a list of dicts. for r1 in result1: percent_weight_change = (float(r1['most_recent_weight']) - float( r1['initial_weight'])) / float(r1['initial_weight']) # if percent_weight_change > -unrealistic_weight_change and percent_weight_change < unrealistic_weight_change : # filter out unrealistic values list_weight_changes_all.append(percent_weight_change) histograma_bines_gral.histograma_bins( list_weight_changes_all, 200, "./network_all_users/histogram_weight_change_users_all_200bins.dat") print >> file_output, "tot. number users", len(list_weight_changes_all) print >> file_output, "KS test all against normal distrib:", stats.kstest( list_weight_changes_all, "norm") print >> file_output, "two-sided KS test all vs GC:", stats.ks_2samp( list_weight_changes_all, list_weight_changes_GC) print >> file_output, "two-sided KS test all vs users with R6 friends:", stats.ks_2samp( list_weight_changes_GC, list_weight_changes_R6friends) file_output.close() print "written file:", output_filename exit() query1 = """SELECT * FROM friends order by src asc""" result1 = db.query(query1) # is a list of dict. print "number links:", len(result1) list_friends = [] for r1 in result1: label_src = r1['src'] label_dest = r1['dest'] if label_src not in list_friends: list_friends.append(label_src) if label_dest not in list_friends: list_friends.append(label_dest) print "num networked users:", len(list_friends)
def main(): pupulation_age="All" #"young" # or "adult" or All if pupulation_age== "young": min_age_threshold=0 max_age_threshold=15 elif pupulation_age== "adult": min_age_threshold=16 max_age_threshold=100 elif pupulation_age== "All": min_age_threshold=0 max_age_threshold=100 else: print "wrong age range" exit() filename="../Data/userdata.pickle" master_list=pickle.load(open(filename, 'rb')) # es una lista: un elemento por jugador (541) ######### output files Nbins=15 name_h="../Results/histogram_ages.dat" output_filename1="../Results/Cooperation_TSplane_"+str(pupulation_age)+"_ages.dat" output1= open(output_filename1,'wt') ######### ### master_list tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...], 'nickname': u'Caesar', 'id': 2L}] #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda) # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...] dict_TSplane_list_actions={} dict_TSplane_avg_coop={} dict_TSplane_std_coop={} list_ages=[] for dictionary in master_list: # cada elemento de la lista es a su vez un dict payoff_total=dictionary['guany_total'] partida=dictionary['partida'] genero=dictionary['genere'] if genero =="h": genero=1 elif genero == "d": genero=0 num_elecciones=dictionary['num_eleccions'] age=int(dictionary['edat']) avg_racionalidad=dictionary['rationality'] avg_ambicion=dictionary['ambition'] num_rondas=len(dictionary['rondes']) nickname=unidecode(dictionary['nickname']).replace(" ", "_") user_id=dictionary['id'] list_dict_rondas=dictionary['rondes'] list_ages.append(age) for dict_ronda in list_dict_rondas: ## cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0} payoff=dict_ronda['guany'] payoff_oponent=dict_ronda['guany_oponent'] rationality=dict_ronda['rationality'] ambition=dict_ronda['ambition'] action=dict_ronda['seleccio'] if action =="C": action=1. elif action=="D": action=0. # si no ha elegido nada, es None num_ronda=dict_ronda['numronda'] quadrant=dict_ronda['cuadrant'].replace(" ", "_").replace("'", "") action_oponent=dict_ronda['seleccio_oponent'] if action_oponent =="C": action_oponent=1. elif action_oponent=="D": action_oponent=0. # si no ha elegido nada, es None oponent_id=dict_ronda['oponent'] T=int(dict_ronda['T']) S=int(dict_ronda['S']) punto_TS=(T,S) if punto_TS in dict_TSplane_list_actions: if action !=None: if age >= min_age_threshold and age <= max_age_threshold: dict_TSplane_list_actions[punto_TS].append(action) # 1:C, 0:D else: if action !=None: if age >= min_age_threshold and age <= max_age_threshold: dict_TSplane_list_actions[punto_TS]=[] dict_TSplane_list_actions[punto_TS].append(action) if rationality !=None: rationality=rationality*100 if ambition !=None: ambition=ambition*100 old_T=None ####### the the avg cooperation per TS point for punto_TS in sorted(dict_TSplane_list_actions): dict_TSplane_avg_coop[punto_TS]=numpy.mean(dict_TSplane_list_actions[punto_TS]) dict_TSplane_std_coop[punto_TS]=numpy.std(dict_TSplane_list_actions[punto_TS]) if old_T != punto_TS[0]: print >> output1 print print punto_TS[0],punto_TS[1], dict_TSplane_avg_coop[punto_TS], dict_TSplane_std_coop[punto_TS] print >> output1,punto_TS[0],punto_TS[1], dict_TSplane_avg_coop[punto_TS], dict_TSplane_std_coop[punto_TS] old_T=punto_TS[0] histograma_bines_gral.histograma_bins(list_ages,Nbins, name_h) #x_position , norm_count, count, norm_cumulat_count, cumulat_count , float(hist[b])/float(len(lista)) output1.close() print "written output datafile:", output_filename1
def main(graph_name): G = nx.read_gml(graph_name) list_id_weekends_T3 = look_for_T3_weekends( G ) # T3 doesnt share fellows in the weekend (but they are the exception) percent_envelope = 95. Niter = 1000 cutting_day = 125 Nbins = 200 # for the histogram of sum of distances for_testing_fixed_set = "YES" # when YES, fixed values param and get all statistics on final distances etc envelopes = "NO" delta_end = 3. # >= than + or - dr difference at the end of the evolution dir_real_data = '../Results/' all_team = "NO" # as adopters or not NO now means i use the file without fellows, only attendings if for_testing_fixed_set == "NO": output_file3 = "../Results/weight_shifts/Landscape_parameters_persuasion_" + str( Niter) + "iter_A_F_inferred.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol = "../Results/Actual_evolution_adopters_from_inference.dat" file1 = open( filename_actual_evol, 'r' ) ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file = file1.readlines() list_actual_evol = [] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters = float(line.split("\t")[1]) list_actual_evol.append(num_adopters) ################################################################## #../Results/weight_shifts/persuasion/Time_evolutions_Persuasion_training_alpha0.2_damping0.0_mutual_encourg0.5_threshold0.7_unif_distr_1000iter_2012_seed31Oct_finalnetwork_day125.dat #../Results/weight_shifts/persuasion/Time_evolutions_Persuasion_training_alpha0.5_damping0.4_mutual_encourg0.5_threshold0.5_unif_distr_1000iter_2012_seed31Oct_finalnetwork_day125.dat #OJO!!! NECESITO DOS DECIMALES SIEMPRE, PARA QUE CUADRE CON EL NOMBRE DE LOS SUB-DIRECTORIOS DONDE LO GUARDO alpha_F_min = 0.10 #0.15 # alpha=0: nobody changes their mind alpha_F_max = 0.101 #0.351 delta_alpha_F = 0.10 #AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT! min_damping = 0.00 #0.0 #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N max_damping = 0.001 #0.451 delta_damping = 0.10 min_mutual_encouragement = 0.000 #0.50 # when two Adopters meet, they convince each other even more max_mutual_encouragement = 0.001 # 0.51 # KEEP THIS FIXED VALUES FOR NOW delta_mutual_encouragement = 0.10 threshold_min = 0.50 #0.50 # larger than, to be an Adopte threshold_max = 0.501 # 0.51 # KEEP THIS FIXED VALUES FOR NOW delta_threshold = 0.10 # AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT print "\n\nPersuasion process on network, with Niter:", Niter dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one threshold = threshold_min while threshold <= threshold_max: print "thershold:", threshold alpha_F = alpha_F_min while alpha_F <= alpha_F_max: # i explore all the parameter space, and create a file per each set of valuesllkl alpha_A = 1.0 * alpha_F print " alpha_F:", alpha_F mutual_encouragement = min_mutual_encouragement while mutual_encouragement <= max_mutual_encouragement: print " mutual_encouragement:", mutual_encouragement damping = min_damping while damping <= max_damping: print " damping:", damping dir = "../Results/weight_shifts/persuasion/alpha%.2f_damping%.2f/" % ( alpha_F, damping) if for_testing_fixed_set == "YES": output_file = dir + "Time_evol_Persuasion_alpha" + str( alpha_F ) + "_damping" + str(damping) + "_mutual" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_A_F_inferred.dat" else: output_file = dir + "Time_evol_Persuasion_alpha" + str( alpha_F ) + "_damping" + str(damping) + "_mutual" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_A_F_inferred.dat" file = open(output_file, 'wt') file.close() time_evol_number_adopters_ITER = [ ] # list of complete single realizations of the dynamics list_dist_fixed_parameters = [] list_dist_fixed_parameters_testing_segment = [] list_dist_abs_at_ending_point_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_final_num_adopt = [] list_abs_dist_point_by_point_indiv_simus_to_actual = [] list_dist_point_by_point_indiv_simus_to_actual = [] #list_abs_dist_at_cutting_day=[] for iter in range(Niter): print " ", iter list_t = [] time_evol_number_adopters = [ ] # for a single realization of the dynamics num_adopters, seed_shift, max_shift = set_ic( G, threshold ) # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total time_evol_number_adopters.append(float(num_adopters)) # print "initial number of adopters:", num_adopters list_t.append(0) ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file4 = open(output_file.split('.dat')[0]+"_indiv_iter"+str(iter)+".dat",'wt') # file4.close() ########################################## # the dynamics starts: t = int(seed_shift ) + 1 # the first time step is just IC.??? while t <= max_shift: # loop over shifts, in chronological order (the order is the day index since seeding_day) # print 't:',t list_t.append(t) for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: # i look for the shift corresponding to that time step shift_length = int( G.node[n]['shift_length']) if shift_length == 2 and n not in list_id_weekends_T3: shift_length = 1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) # print "one-day weekend", G.node[n]['label'],G.node[n]['shift_length'] flag_possible_persuasion = 0 for doctor in G.neighbors(n): if G.node[doctor][ "status"] == "Adopter": #first i check if any doctor is an adopter in this shift flag_possible_persuasion = 1 break if flag_possible_persuasion == 1: list_doctors = [] for doctor in G.neighbors( n ): # for all drs in that shift list_doctors.append(doctor) pairs = itertools.combinations( list_doctors, 2 ) # cos the shift can be 2 but also 3 doctors for pair in pairs: doctor1 = pair[0] doctor2 = pair[1] if G.node[doctor1][ 'status'] != G.node[doctor2][ 'status']: # if they think differently, # there will be persuasion persuasion( G, damping, doctor1, doctor2, alpha_A, alpha_F, threshold, shift_length ) # i move their values of opinion update_opinions( G, threshold, doctor1, doctor2 ) # i update status and make sure the values of the vectors stay between [0,1] else: # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens) mutual_reinforcement( G, mutual_encouragement, doctor1, doctor2, shift_length) # else: # print " no persuasion possible during shift (no adopters present)!" list_Adopters = [ ] #count how many i have at this time for n in G.nodes(): try: if G.node[n]["status"] == "Adopter": if G.node[n][ "label"] not in list_Adopters: # and G.node[n]["type"]=="A": list_Adopters.append( G.node[n]["label"]) except: pass # if the node is a shift, it doesnt have a 'status' attribute # if for_testing_fixed_set=="YES": # if t==cutting_day: # list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_Adopters)))) # print abs(float(list_actual_evol[-1])-float(len(list_Adopters))), float(list_actual_evol[t]),float(len(list_Adopters)) time_evol_number_adopters.append( float(len(list_Adopters))) t += 1 ############## end while loop over t time_evol_number_adopters_ITER.append( time_evol_number_adopters) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called. compare_two_curves(list_actual_evol, time_evol_number_adopters)) list_dist_fixed_parameters_testing_segment.append( compare_real_evol_vs_simus_to_be_called. compare_two_curves_testing_segment( list_actual_evol, time_evol_number_adopters, cutting_day)) list_dist_abs_at_ending_point_fixed_parameters.append( abs(time_evol_number_adopters[-1] - list_actual_evol[-1])) list_dist_at_ending_point_fixed_parameters.append( time_evol_number_adopters[-1] - list_actual_evol[-1]) list_final_num_adopt.append( time_evol_number_adopters[-1]) ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file4 = open(output_file.split('.dat')[0]+"_indiv_iter"+str(iter)+".dat",'at') # for i in range(len(time_evol_number_adopters)): #ime step by time step # print >> file4, i,time_evol_number_adopters[i], alpha_F,damping,mutual_encouragement #file4.close() ######################################################## for index in range(len(time_evol_number_adopters)): list_abs_dist_point_by_point_indiv_simus_to_actual.append( abs(time_evol_number_adopters[index] - list_actual_evol[index])) list_dist_point_by_point_indiv_simus_to_actual.append( time_evol_number_adopters[index] - list_actual_evol[index]) #######################end loop over Niter list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean( list_dist_abs_at_ending_point_fixed_parameters)) if for_testing_fixed_set == "NO": file3 = open(output_file3, 'at') # i print out the landscape print >> file3, alpha_F, damping, mutual_encouragement, threshold, numpy.mean( list_dist_abs_at_ending_point_fixed_parameters ), numpy.mean(list_dist_fixed_parameters), numpy.mean( list_final_num_adopt), numpy.std( list_final_num_adopt ), numpy.std(list_final_num_adopt) / numpy.mean( list_final_num_adopt) file3.close() if ( numpy.mean( list_dist_abs_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file] = list_pair_dist_std_delta_end file = open(output_file, 'wt') for i in range(len(time_evol_number_adopters) ): #time step by time step list_fixed_t = [] for iteracion in range( Niter ): #loop over all independent iter of the process list_fixed_t.append( time_evol_number_adopters_ITER[iteracion][i] ) # i collect all values for the same t, different iter print >> file, list_t[i], numpy.mean( list_fixed_t), numpy.std( list_fixed_t ), alpha_F, damping, mutual_encouragement file.close() print "printed out: ", output_file if envelopes == "YES": calculate_envelope_set_curves.calculate_envelope( time_evol_number_adopters_ITER, percent_envelope, "Persuasion", [ alpha_F, damping, mutual_encouragement, threshold ]) if for_testing_fixed_set == "YES": num_valid_endings = 0. for item in list_dist_abs_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings += 1. print "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" histograma_gral_negv_posit.histograma( list_dist_at_ending_point_fixed_parameters, histogram_filename) histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, Nbins, histogram_filename2) histogram_filename3 = "../Results/weight_shifts/histogr_sum_dist_testing_segment_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" histograma_bines_gral.histograma_bins_zero( list_dist_fixed_parameters_testing_segment, Nbins, histogram_filename3) histogram_filename4 = "../Results/weight_shifts/histogr_abs_dist_point_by_point_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" histograma_gral_negv_posit.histograma( list_abs_dist_point_by_point_indiv_simus_to_actual, histogram_filename4) histogram_filename5 = "../Results/weight_shifts/histogr_dist_point_by_point_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" histograma_gral_negv_posit.histograma( list_dist_point_by_point_indiv_simus_to_actual, histogram_filename5) output_file10 = "../Results/weight_shifts/Summary_results_persuasion_alpha" + str( alpha_F) + "_damping" + str( damping) + "_mutual_encourg" + str( mutual_encouragement ) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter_alphaA_eq_alphaF_day" + str( cutting_day) + "_A_F_inferred.dat" file10 = open(output_file10, 'wt') print >> file10, "Summary results from best fit persuasion with", Niter, "iter, and with values for the parameters: alpha ", alpha_F, " damping: ", damping, " mutual_encourg: ", mutual_encouragement, " threshold:", threshold print >> file10, "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters print >> file10, "written optimum train_test evolution file:", output_file print >> file10, "written histogram file: ", histogram_filename print >> file10, "written histogram file: ", histogram_filename2 file10.close() print "written optimum train_test evolution file:", output_file print "written summary file: ", output_file10 damping += delta_damping mutual_encouragement += delta_mutual_encouragement alpha_F += delta_alpha_F threshold += delta_threshold if for_testing_fixed_set == "NO": # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Persuasion_weight", all_team, Niter, None) #last argument, cutting day (it doesnt apply) if for_testing_fixed_set == "NO": print "written landscape file:", output_file3
def main(graph_name): cutting_day=175 # to separate training-testing G = nx.read_gml(graph_name) all_team="NO" # as adopters or not list_id_weekends_T3=look_for_T3_weekends(G) # T3 doesnt share fellows in the weekend (but they are the exception) dir_real_data='../Results/' Nbins=20 # for the histogram of sum of distances delta_end=3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) Niter=1000 fixed_param=""#FIXED_Pimm0_" # or "" # for the Results file that contains the sorted list of best parameters ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### filename_actual_evol="../Results/Actual_evolution_adopters_from_inference.dat" file1=open(filename_actual_evol,'r') ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file=file1.readlines() list_actual_evol=[] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters= float(line.split("\t")[1]) list_actual_evol.append(num_adopters) list_actual_evol_training=list_actual_evol[:cutting_day] # list_actual_evol_testing=list_actual_evol[(cutting_day-1):] # i dont need this one ################################################################## prob_min=0.10 prob_max=1.01 delta_prob=0.1 prob_Immune_min=0.0 prob_Immune_max=1.001 delta_prob_Immune=0.1 # threshold is not personal, and set randomly to a value (0,1) # of a single encounter with an infected (it cant be zero or it doesnt make sense!) dose_min=0.05 #infect_threshold_min dose_max=1.001 #######infect_threshold_min/10. delta_dose=0.05 ##infect_threshold_min/10. dir="../Results/weight_shifts/infection/" dict_filenames_tot_distance={} # i will save the filename as key and the tot distance from that curve to the original one dict_filenames_prod_distances={} prob_Immune=prob_Immune_min while prob_Immune<= prob_Immune_max: print "prom Immune:",prob_Immune prob_infection=prob_min while prob_infection<= prob_max: print " p:",prob_infection dose=dose_min while dose <= dose_max: print " dose:",dose output_file2=dir+"Average_time_evolution_Infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" # file2 = open(output_file2,'wt') # file2.close() list_lists_t_evolutions=[] # i create the empty list of list for the Niter temporal evolutions list_dist_fixed_parameters=[] list_dist_at_ending_point_fixed_parameters=[] list_dist_abs_at_ending_point_fixed_parameters=[] for iter in range(Niter): # print " iter:",iter ########### set I.C. list_I=[] #list infected doctors max_order=0 for n in G.nodes(): G.node[n]["status"]="S" # all nodes are Susceptible G.node[n]["infec_value"]=0. G.node[n]["personal_threshold"]=random.random() # for a dr to become infected if G.node[n]['type']=="shift": if G.node[n]['order']>max_order: max_order=G.node[n]['order'] # to get the last shift-order for the time loop else: if G.node[n]['label']=="Wunderink" or G.node[n]["label"]=="Weiss": G.node[n]["status"]="I" G.node[n]["infec_value"]=G.node[n]["personal_threshold"]+ 1. list_I.append(G.node[n]['label']) list_single_t_evolution=[] list_single_t_evolution.append(2.0) # I always start with TWO infected doctors!! for n in G.nodes(): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"): if G.node[n]['label']!="Wunderink" and G.node[n]["label"]!="Weiss": rand=random.random() if rand< prob_Immune: G.node[n]["status"]="Immune" ################# the dynamics starts: t=1 while t< cutting_day: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type']=="shift" and G.node[n]['order']==t: shift_length=int(G.node[n]['shift_length']) if shift_length==2 and n not in list_id_weekends_T3: shift_length=1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) flag_possible_infection=0 for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift if G.node[doctor]["status"]=="I": flag_possible_infection=1 if flag_possible_infection: for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection for i in range(shift_length): # i repeat the infection process several times, to acount for shift lenght if G.node[doctor]["status"]=="S": rand=random.random() if rand<prob_infection: # with prob p the infection occurres G.node[doctor]["infec_value"]+=dose # and bumps the infection_value of that susceptible dr if G.node[doctor]["infec_value"]>= G.node[doctor]["personal_threshold"]: # the threshold for infection is personal G.node[doctor]["status"]="I" list_I.append(G.node[doctor]["label"]) list_single_t_evolution.append(float(len(list_I))) t+=1 ######## end t loop list_lists_t_evolutions.append(list_single_t_evolution) #print "actual:",len(list_actual_evol_training)," simu:",len(list_single_t_evolution) list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training,list_single_t_evolution)) list_dist_abs_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1]-list_actual_evol_training[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1]-list_actual_evol_training[-1]) # i save the distance at the ending point between the current simu and actual evol ######## end loop Niter for the training fase list_pair_dist_std_delta_end=[] list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) ) list_pair_dist_std_delta_end.append(numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) value=numpy.mean(list_dist_fixed_parameters) *numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)# if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!! dict_filenames_prod_distances[output_file2]= value if (numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[output_file2]=list_pair_dist_std_delta_end histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename) histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2) print "written histogram file: ",histogram_filename print "written histogram file: ",histogram_filename2 dose+= delta_dose prob_infection+= delta_prob prob_Immune+= delta_prob_Immune string_name="infection_memory_training_"+fixed_param+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat" # for the "Results" file with the sorted list of files list_order_dict= compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,string_name,Niter,cutting_day) # it returns a list of tuples like this : ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0]) the best set of parameters being the fist one of the elements in that list. list_order_dict2= compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances(dict_filenames_prod_distances,string_name,Niter,cutting_day) prob_infection=float(list_order_dict[0][0].split("_p")[1].split("_")[0]) prob_Immune=float(list_order_dict[0][0].split("_Immune")[1].split("_")[0]) dose=float(list_order_dict[0][0].split("_dose")[1].split("_")[0]) print "\nOptimum parameters (old method) at day",cutting_day," are: p=",prob_infection," Pimmune=",prob_Immune," infection threshold from distribution, and dose=",dose # optimum_filename=list_order_dict2[0][0] prob_infection=float(list_order_dict2[0][0].split("_p")[1].split("_")[0]) prob_Immune=float(list_order_dict2[0][0].split("_Immune")[1].split("_")[0]) dose=float(list_order_dict2[0][0].split("_dose")[1].split("_")[0]) print "Optimum parameters (product of distances and SDs) at day",cutting_day," are: p=",prob_infection," Pimmune=",prob_Immune," infection threshold from distribution, and dose=",dose
def main(): Nbins = 12 Num_clusters = 5 Niter = 200 # for bootstraping the gender distributions by cluster vs all #####Clusters from kmeans k=5, (iter109) #####Cluster1: cooperate only in up triangle H (Competidores) #####Cluster2: cooperate in left half of plane (Cazarecompensas) #####Cluster3: rarunos #####Cluster4: cooperate everywhere (Cooperators) #####Cluster5: cooperate in top half of plane (Conservadores) dict_cluster_number_name = { 1: "Competitive", 2: "Greedy", 3: "Clueless", 4: "Altruists", 5: "Conservative" } ######### input masterfile filename = "../Data/userdata.pickle" master_list = pickle.load( open(filename, 'rb') ) #### master_list tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...], 'nickname': u'Caesar', 'id': 2L}] # threshold_flag= "_treshold0.8" # "_treshold0.8" or "" # gral_filename="../Results/dau2014_partition"+threshold_flag+"_Carlos_" gral_filename = "../Results/list_" #../Results/dau2014_partition_Carlos_rationals.pickle #../Results/dau2014_partition_treshold0.8_Carlos_rationals.pickle #../Results/list_rationals.pickle # mia list_lists_def = [] list_names = ["weirdos", "rationals", "mostly_def", "altruists"] file_weirdos = gral_filename + "weirdos.pickle" list_weirdos = pickle.load(open(file_weirdos, 'rb')) list_lists_def.append(list_weirdos) file_rationals = gral_filename + "rationals.pickle" list_rationals = pickle.load(open(file_rationals, 'rb')) list_lists_def.append(list_rationals) file_mostly_def = gral_filename + "mostly_def.pickle" list_mostly_def = pickle.load(open(file_mostly_def, 'rb')) list_lists_def.append(list_mostly_def) file_altruists = gral_filename + "altruists.pickle" list_altruists = pickle.load(open(file_altruists, 'rb')) list_lists_def.append(list_altruists) filename_all = "../Results/list_all_users.pickle" list_all_users = pickle.load(open(filename_all, 'rb')) dict_user_id_info = {} for dictionary in master_list: # cada elemento de la lista es a su vez un dict partida = dictionary['partida'] num_elecciones = int(dictionary['num_eleccions']) age = int(dictionary['edat']) num_rondas = len(dictionary['rondes']) tot_earnings = int(dictionary['guany_total']) nickname = unidecode(dictionary['nickname']).replace(" ", "_") earnings_by_round = tot_earnings / float(num_elecciones) print tot_earnings, float(num_elecciones), earnings_by_round gender = dictionary['genere'] if gender == 'h': gender = 1 elif gender == 'd': gender = 0 user_id = int(dictionary['id']) dict_user_id_info[user_id] = {} dict_user_id_info[user_id]['num_elecciones'] = num_elecciones dict_user_id_info[user_id]['age'] = age dict_user_id_info[user_id]['gender'] = gender dict_user_id_info[user_id]['tot_earnings'] = tot_earnings dict_user_id_info[user_id]['earnings_by_round'] = earnings_by_round # print user_id, dict_user_id_info[user_id]['age'], dict_user_id_info[user_id]['gender'], dict_user_id_info[user_id]['tot_earnings'], nickname dict_cluster_number_list_ages_in_clusters = { } # for the pairwise KS-test comparison list_cluster_numbers = [] list_age_all = [] list_gender_all = [] list_tot_earnings_all = [] list_norm_earnings_by_round_all = [] for cluster in range(Num_clusters): cluster += 1 print "\ncluster", cluster, dict_cluster_number_name[cluster] list_age_current_cluster = [] list_gender_current_cluster = [] list_tot_earnings_current_cluster = [] list_norm_earnings_by_round_current_cluster = [] file_cluster = "../Results/Niter_clustering/list_clusters_kmeans" + str( Num_clusters) + "-" + str(cluster) + "_109iter.pickle" # file_cluster="../Results/list_clusters_kmeans5_dist_notypes-1.pickle # the MeV data list_current_cluster = pickle.load(open(file_cluster, 'rb')) for user_id in list_current_cluster: list_age_current_cluster.append(dict_user_id_info[user_id]['age']) list_gender_current_cluster.append( dict_user_id_info[user_id]['gender']) list_tot_earnings_current_cluster.append( dict_user_id_info[user_id]['tot_earnings']) list_norm_earnings_by_round_current_cluster.append( dict_user_id_info[user_id]['earnings_by_round']) list_age_all.append(dict_user_id_info[user_id]['age']) list_gender_all.append(dict_user_id_info[user_id]['gender']) list_tot_earnings_all.append( dict_user_id_info[user_id]['tot_earnings']) list_norm_earnings_by_round_all.append( dict_user_id_info[user_id]['earnings_by_round']) dict_cluster_number_list_ages_in_clusters[ cluster] = list_age_current_cluster ######## i calculate the z-score of a cluster's gender distribution vs the total pupulation list_avg_gender_in_synthetic_cluster = [] for iter in range(Niter): # bootstrapping the age distribution vs all list_synth = sample_with_replacement( list_gender_all, len(list_gender_current_cluster)) list_avg_gender_in_synthetic_cluster.append(numpy.mean(list_synth)) z_score = (numpy.mean(list_gender_current_cluster) - numpy.mean(list_avg_gender_in_synthetic_cluster) ) / numpy.std(list_avg_gender_in_synthetic_cluster) print "z-score of gender distributions of cluster", dict_cluster_number_name[ cluster], "vs all:", z_score raw_input() histograma_bines_gral.histograma_bins( list_age_current_cluster, Nbins, "../Results/Hist_age_cluster" + str(Num_clusters) + "_" + str(cluster) + ".dat") print "avg. age:", numpy.mean( list_age_current_cluster), "std:", numpy.std( list_age_current_cluster) histograma_bines_gral.histograma_bins( list_tot_earnings_current_cluster, Nbins, "../Results/Hist_tot_earnings_cluster" + str(Num_clusters) + "_" + str(cluster) + ".dat") print "avg. earnings:", numpy.mean( list_tot_earnings_current_cluster), "std:", numpy.std( list_tot_earnings_current_cluster) histograma_bines_gral.histograma_bins( list_norm_earnings_by_round_current_cluster, Nbins, "../Results/Hist_norm_earnings_by_round_cluster" + str(Num_clusters) + "_" + str(cluster) + ".dat") print "avg. earnings/num_rounds:", numpy.mean( list_norm_earnings_by_round_current_cluster), "std:", numpy.std( list_norm_earnings_by_round_current_cluster) print "avg. gender:", numpy.mean( list_gender_current_cluster), numpy.std( list_gender_current_cluster), " (1: male, 0: female)" print "cluster size:", len(list_current_cluster) for i in range(len(list_lists_def)): name_def = list_names[i] print name_def list_age_current_cluster = [] list_gender_current_cluster = [] list_tot_earnings_current_cluster = [] list_norm_earnings_by_round_current_cluster = [] list_current_cluster = list_lists_def[i] for user_id in list_current_cluster: list_age_current_cluster.append(dict_user_id_info[user_id]['age']) list_gender_current_cluster.append( dict_user_id_info[user_id]['gender']) list_tot_earnings_current_cluster.append( dict_user_id_info[user_id]['tot_earnings']) list_norm_earnings_by_round_current_cluster.append( dict_user_id_info[user_id]['earnings_by_round']) histograma_bines_gral.histograma_bins( list_age_current_cluster, Nbins, "../Results/Hist_age_cluster_" + str(name_def) + ".dat") print "avg. age:", numpy.mean( list_age_current_cluster), "std:", numpy.std( list_age_current_cluster) histograma_bines_gral.histograma_bins( list_tot_earnings_current_cluster, Nbins, "../Results/Hist_tot_earnings_cluster_" + str(name_def) + ".dat") print "avg. earnings:", numpy.mean( list_tot_earnings_current_cluster), "std:", numpy.std( list_tot_earnings_current_cluster) histograma_bines_gral.histograma_bins( list_norm_earnings_by_round_current_cluster, Nbins, "../Results/Hist_norm_earnings_by_round_cluster_" + str(name_def) + ".dat") print "avg. earnings/num_rounds:", numpy.mean( list_norm_earnings_by_round_current_cluster), "std:", numpy.std( list_norm_earnings_by_round_current_cluster) print "avg. gender:", numpy.mean( list_gender_current_cluster), numpy.std( list_gender_current_cluster), " (1: male, 0: female)" print "cluster size:", len(list_current_cluster) print histograma_bines_gral.histograma_bins(list_age_all, Nbins, "../Results/Hist_age_all.dat") print "general avg. age:", numpy.mean(list_age_all), "std:", numpy.std( list_age_all) histograma_bines_gral.histograma_bins( list_tot_earnings_all, Nbins, "../Results/Hist_tot_earnings_all.dat") print "general avg. earnings:", numpy.mean( list_tot_earnings_all), "std:", numpy.std(list_tot_earnings_all) histograma_bines_gral.histograma_bins( list_norm_earnings_by_round_all, Nbins, "../Results/Hist_norm_earnings_by_round_all.dat") print "avg. earnings/num_rounds:", numpy.mean( list_norm_earnings_by_round_all), "std:", numpy.std( list_norm_earnings_by_round_all) print "general avg. gender:", numpy.mean(list_gender_all), numpy.std( list_gender_all), " (1: male, 0: female)" print "tot population:", len(list_age_all) ######## KS test #This tests whether 2 samples are drawn from the same distribution. Note that, like in the case of the one-sample K-S test, the distribution is assumed to be continuous. #This is the two-sided test, one-sided tests are not implemented. The test uses the two-sided asymptotic Kolmogorov-Smirnov distribution. #If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same. print "\nKS-tests for age distributions by cluster:" for item in itertools.combinations( dict_cluster_number_list_ages_in_clusters, 2): list_ages_cluster1 = dict_cluster_number_list_ages_in_clusters[item[0]] list_ages_cluster2 = dict_cluster_number_list_ages_in_clusters[item[1]] print "for clusters:", dict_cluster_number_name[ item[0]], dict_cluster_number_name[ item[1]], " (KS,p):", stats.ks_2samp(list_ages_cluster1, list_ages_cluster2) print " for cluster:", dict_cluster_number_name[ item[0]], "vs all (KS,p):", stats.ks_2samp( list_ages_cluster1, list_age_all) print " for cluster:", dict_cluster_number_name[ item[1]], "vs all (KS,p):", stats.ks_2samp( list_ages_cluster2, list_age_all) print "\n **If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same."
def main(graph_name): G = nx.read_gml(graph_name) prob_infection=0.9 prob_Immune=0.5 Niter=100000 dir_real_data='../Results/' all_team="NO" # as adopters or not ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team=="YES": filename_actual_evol=dir_real_data+"HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv" else: filename_actual_evol=dir_real_data+"HospitalModel_august1_adoption_counts_SIMPLER.csv" #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs list_actual_evol=[] result_actual_file= csv.reader(open(filename_actual_evol, 'rb'), delimiter=',') cont=0 for row in result_actual_file: if cont>0: # i ignore the first line with the headers num_adopters= row[3] list_actual_evol.append(float(num_adopters)) cont+=1 ################################################################## # i create the empty list of list for the Niter temporal evolutions num_shifts=0 for n in G.nodes(): G.node[n]["status"]="S" if G.node[n]['type']=="shift": num_shifts+=1 # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions=[] for iter in range(Niter): print " iter:",iter list_I=[] #list infected doctors list_ordering=[] list_s=[] list_A=[] list_F=[] ########### set I.C. max_order=0 for n in G.nodes(): G.node[n]["status"]="S" # all nodes are Susceptible if G.node[n]['type']=="shift": list_s.append(n) if G.node[n]['order']>max_order: max_order=G.node[n]['order'] else: if G.node[n]['label']=="Wunderink" or G.node[n]["label"]=="Weiss": G.node[n]["status"]="I" list_I.append(G.node[n]['label']) ######################## WHAT ABOUT SMITH AND SPORN??? if G.node[n]['type']=="A": list_A.append(n) if G.node[n]['type']=="F": list_F.append(n) list_single_t_evolution=[] list_single_t_evolution.append(2.0) # I always start with TWO infected doctors!! for n in G.nodes(): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"): if G.node[n]['label']!="Wunderink" and G.node[n]["label"]!="Weiss": # these particular two cant be immune rand=random.random() if rand< prob_Immune: G.node[n]["status"]="Immune" # print max_order ################# the dynamics starts: t=1 while t<= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type']=="shift" and G.node[n]['order']==t: flag_possible_infection=0 for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift if G.node[doctor]["status"]=="I": flag_possible_infection=1 if flag_possible_infection: for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection if G.node[doctor]["status"]=="S": rand=random.random() if rand<prob_infection: G.node[doctor]["status"]="I" list_I.append(G.node[doctor]["label"]) list_single_t_evolution.append(float(len(list_I)))#/(len(list_A)+len(list_F))) t+=1 list_lists_t_evolutions.append(list_single_t_evolution) ######## end Niter ##############end loop Niter average_t_evolution=[] for i in range(len(list_single_t_evolution)): #time step by time step list_fixed_t=[] for iteracion in range (Niter): #loop over all independent iter of the process list_fixed_t.append(list_lists_t_evolutions[iteracion][i]) # i collect all values for the same t, different iter average_t_evolution.append(numpy.mean(list_fixed_t)) # i create the mean time evolution list_dist_fixed_parameters=[] for lista in list_lists_t_evolutions: list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( lista,average_t_evolution)) lista_tuplas=histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,75, "../Results/histogr_distances_indiv_infect_simus_to_the_average_curve_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_2012.dat") # Nbins=50 #print lista_tuplas starting_point=compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol,average_t_evolution) # distance between actual curve and the mean curve prob=calculate_numeric_integral.integral(lista_tuplas, starting_point) print "the probability of having a distance equal or larger than",starting_point, "between actual-average curve is:", prob, "(it is to say, the prob. of the actual evolution being an individual realization of the Infection Model)" if all_team=="YES": file = open("../Results/distance_actual_to_average_curve_infection_all_team_as_adopters.dat",'wt') else: file = open("../Results/distance_actual_to_average_curve_infection.dat",'wt') print >> file,starting_point, 0. print >> file,starting_point+0.1, 1. file.close() if all_team=="YES": file2 = open("../Results/Results_distance_actual_to_average_curve_infection_all_team_as_adopters.dat",'wt') else: file2 = open("../Results/Results_distance_actual_to_average_curve_infection.dat",'wt') print >> file2, "the probability of having a distance equal or larger than",starting_point, "between actual-average curve is:", prob, "(it is to say, the prob. of the actual evolution being an individual realization of the Infection Model)" file2.close()
def main(graph_name): G = nx.read_gml(graph_name) all_team = "NO" # as adopters or not Niter = 20 dir_real_data = '../Results/' delta_end = 300 # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) output_file3 = dir_real_data + "Landscape_parameters_infection_" + str( Niter) + "iter.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv" else: filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv" #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs list_actual_evol = [] result_actual_file = csv.reader(open(filename_actual_evol, 'rb'), delimiter=',') cont = 0 for row in result_actual_file: if cont > 0: # i ignore the first line with the headers num_adopters = row[3] list_actual_evol.append(float(num_adopters)) cont += 1 ################################################################## #../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat prob_min = 1.00 prob_max = 1.01 delta_prob = 0.1 prob_Immune_min = 0.50 prob_Immune_max = 0.51 delta_prob_Immune = 0.1 dir = "../Results/network_final_schedule_withTeam3_local/infection/" dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection output_file2 = dir + "Average_time_evolution_Infection_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_2012.dat" file2 = open(output_file2, 'wt') file2.close() # i create the empty list of list for the Niter temporal evolutions num_shifts = 0 for n in G.nodes(): G.node[n]["status"] = "S" if G.node[n]['type'] == "shift": num_shifts += 1 # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions = [] list_dist_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_final_num_infected = [] for iter in range(Niter): print " iter:", iter #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS file_name_indiv_evol = output_file2.strip("Average_").split( '.dat')[0] + "_indiv_iter" + str(iter) + ".dat" file4 = open(file_name_indiv_evol, 'wt') file4.close() ########################################## list_I = [] #list infected doctors list_ordering = [] list_s = [] ########### set I.C. max_order = 0 for n in G.nodes(): G.node[n]["status"] = "S" # all nodes are Susceptible if G.node[n]['type'] == "shift": list_s.append(n) if G.node[n]['order'] > max_order: max_order = G.node[n]['order'] else: if G.node[n]['label'] == "Wunderink" or G.node[n][ "label"] == "Weiss": G.node[n]["status"] = "I" list_I.append(G.node[n]['label']) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[n][ "label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" # print max_order ################# the dynamics starts: t = 1 while t <= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: G.node[doctor]["status"] = "I" list_I.append( G.node[doctor]["label"]) list_single_t_evolution.append(float(len(list_I))) t += 1 ######## end t loop ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS file4 = open(file_name_indiv_evol, 'at') for i in range( len(list_single_t_evolution)): #ime step by time step print >> file4, i, list_single_t_evolution[ i], prob_infection, prob_Immune file4.close() ######################################################## list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol, list_single_t_evolution)) list_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_final_num_infected.append(time_evol_number_adopters[-1]) ######## end loop Niter list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean(list_dist_at_ending_point_fixed_parameters)) file3 = open(output_file3, 'at') # i print out the landscape print >> file3, alpha_F, damping, mutual_encouragement, threshold, numpy.mean( list_dist_at_ending_point_fixed_parameters), numpy.mean( list_dist_fixed_parameters), numpy.mean( list_final_num_adopt), numpy.std(list_final_num_adopt) file3.close() if ( numpy.mean(list_dist_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end file2 = open(output_file2, 'at') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file2, s, numpy.mean(list_fixed_t) file2.close() # file = open(output_file,'at') #print >> file, prob_infection, numpy.mean(list_final_I_values_fixed_p) #file.close() print list_dist_fixed_parameters histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, 50, "../Results/histogr_distances_indiv_infect_simus_to_the_average_curve_p" + str(prob_infection) + "_" + "Immune" + str(prob_Immune) + "_" + str(Niter) + "iter.dat") # Nbins=100 prob_infection += delta_prob prob_Immune += delta_prob_Immune compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Infection", all_team, Niter)
def main(): minimum_time = 180 min_num_weigh_ins = 2 impossible_weight_change = 80. # plus or minus, it is a mistake for_testing_max_num_queries = 1000 database = "calorie_king_social_networking_2010" server = "tarraco.chem-eng.northwestern.edu" user = "******" passwd = "n1ckuDB!" db = Connection(server, database, user, passwd) query1 = """select * from users""" result1 = db.query(query1) # is a list of dict. num_impossible_weight_changes = 0 # values larger than 100 or smaller than -100 tot_users = 0 tot_users_2weigh_ins = 0 tot_users_6months = 0 tot_users_2weigh_ins_6months = 0 list_weight_changes_before_6months = [] list_weight_changes_after_6months = [] list_days_before_6months = [] list_days_after_6months = [] contador = 0 for r1 in result1: # if contador <= for_testing_max_num_queries: contador += 1 ck_id = r1['ck_id'] tot_users += 1 list_before_6months = [] list_after_6months = [] query2 = "select * from weigh_in_history where (ck_id ='" + str( ck_id) + "') order by on_day" result2 = db.query(query2) # is a list of dicts. if len(result2) > min_num_weigh_ins: tot_users_2weigh_ins += 1 first_date = result2[0]['on_day'] last_date = result2[-1]['on_day'] time_system = (last_date - first_date).days + 1 if time_system >= minimum_time: tot_users_6months += 1 if len(result2) > min_num_weigh_ins: tot_users_2weigh_ins_6months += 1 print tot_users, tot_users_2weigh_ins_6months # print ck_id for r2 in result2: fecha = r2['on_day'] weight = r2['weight'] num_days = (fecha - first_date).days + 1 lista = [] lista.append(num_days) lista.append(weight) if num_days < minimum_time: list_before_6months.append(lista) else: list_after_6months.append(lista) # print fecha, num_days,weight weight_change_before = list_before_6months[-1][ 1] - list_before_6months[0][1] if weight_change_before < impossible_weight_change and weight_change_before > -impossible_weight_change: list_weight_changes_before_6months.append( weight_change_before) else: num_impossible_weight_changes += 1 weight_change_after = list_after_6months[-1][ 1] - list_after_6months[0][1] if weight_change_after < 100. and weight_change_after > -100.: list_weight_changes_after_6months.append( weight_change_after) else: num_impossible_weight_changes += 1 days_after = list_after_6months[-1][0] - list_after_6months[0][ 0] list_days_after_6months.append(days_after) days_before = list_before_6months[-1][0] - list_before_6months[ 0][0] list_days_before_6months.append(days_before) # print 'weight change before 6months:',weight_change_before,' over:',days_before,'days, counting',len(list_before_6months),'weigh_ins' #print 'and after:',weight_change_after, 'over:',days_after,'days, counting',len(list_after_6months),'weigh_ins\n' print "tot number of users:", len( result1 ), " || num users >= 2 weigh-ins:", tot_users_2weigh_ins, " || num users >= 6months:", tot_users_6months, " || num users >= 2 weigh-ins and >= 6months:", tot_users_2weigh_ins_6months, "\n" print "average weight change before 6months:", numpy.mean(list_weight_changes_before_6months), "SD:", numpy.std(list_weight_changes_before_6months), \ "over:", numpy.mean(list_days_before_6months), "days on average, SD:",numpy.std(list_days_before_6months) print "average weight change after 6months:", numpy.mean(list_weight_changes_after_6months), "SD:", numpy.std(list_weight_changes_after_6months), \ "over:", numpy.mean(list_days_after_6months), "days on average, SD:",numpy.std(list_days_after_6months),"\n" ks = stats.ks_2samp(list_weight_changes_before_6months, list_weight_changes_after_6months) print "KS test for list weight changes before vs after the 6months:", ks print "number of impossible weight changes:", num_impossible_weight_changes num_users_regain = 0. for item in list_weight_changes_after_6months: if item > 0.: num_users_regain += 1. print "fraction users who (re)gain weight from the 6month mark on:", num_users_regain / float( len(list_weight_changes_after_6months)), "(average over", len( list_weight_changes_after_6months), "users)" num_users_gain = 0. for item in list_weight_changes_before_6months: if item > 0.: num_users_gain += 1. print "fraction users who gain weight during the first 6months:", num_users_gain / float( len(list_weight_changes_after_6months)), "(average over", len( list_weight_changes_after_6months), "users)" histograma_bines_gral.histograma_bins(list_weight_changes_before_6months, 30, "weight_change_before_6months") histograma_bines_gral.histograma_bins(list_weight_changes_after_6months, 30, "weight_change_after_6months")
def main(): ######## input files for the comparison # filename1="../Results/Cooperation_TSplane_gender_1.dat" #columns: T S <Coop> STD #filename2="../Results/Cooperation_TSplane_gender_0.dat" #filename1="../Results/Cooperation_TSplane_All_ages_rounds1_3.dat" filename1="../Results/Cooperation_TSplane_All_ages_rounds4_10.dat" filename2="../Results/Cooperation_TSplane_All_ages_rounds11_18.dat" #filename1="../Results/Cooperation_TSplane_young_ages.dat" #filename2="../Results/Cooperation_TSplane_adult_ages.dat" print "comparing files:" print " ", filename1 print " ", filename2 ####### output ratio file if "gender" in filename1: filename_ratio="../Results/Ratio_Cooperation_TSplane_genders_1_div_0.dat" filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_genders_1_div_0.dat" elif "ages.dat" in filename1: filename_ratio="../Results/Ratio_Cooperation_TSplane_ages_young_div_adult.dat" filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_ages_young_div_adult.dat" elif "round" in filename1: filename_ratio="../Results/Ratio_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat" filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat" else: exit() file_ratio=open(filename_ratio, 'wt') file_diff_relat=open(filename_diff_relat, 'wt') print filename_ratio print filename_diff_relat ####### i read file1 and save it dict_file1_avg={} dict_file1_std={} list_order_tuplas=[] file1=open(filename1,'r') for line_aux in file1: # ASI LEO LINEA POR LINEA,EN LUGAR DE CARGARLAS TODAS EN MEMORIA PRIMERO (en lugar de: for line in list_lines )!! try: line=line_aux.split(" ") T=line[0] S=line[1] tupla=(T, S) list_order_tuplas.append(tupla) avg_Coop=line[2] std_Coop=line[3] dict_file1_avg[tupla]=float(avg_Coop) dict_file1_std[tupla]=float(std_Coop) except IndexError: # empty line list_order_tuplas.append(" ") ####### i read file2 and save it dict_file2_avg={} dict_file2_std={} file2=open(filename2,'r') for line_aux in file2: try: line=line_aux.split(" ") T=line[0] S=line[1] tupla=(T, S) avg_Coop=line[2] std_Coop=line[3] dict_file2_avg[tupla]=float(avg_Coop) dict_file2_std[tupla]=float(std_Coop) except IndexError: # empty line pass list_ratios=[] ######### i print out the ratio file with the same block structure for tupla in list_order_tuplas: if tupla != " ": # i added a space arificially to separate blocks ratio=dict_file1_avg[tupla]/dict_file2_avg[tupla] diff_relat=(dict_file1_avg[tupla]-dict_file2_avg[tupla])/dict_file2_avg[tupla] # print tupla[0], tupla[1], dict_file1_avg[tupla], dict_file2_avg[tupla], " ratio:",ratio print >> file_ratio, tupla[0], tupla[1], ratio print >> file_diff_relat, tupla[0], tupla[1], diff_relat list_ratios.append(ratio) list_ratios.append(diff_relat) else: print >> file_ratio print >> file_diff_relat print "written file:",filename_ratio print "written file:",filename_diff_relat if "gender" in filename1: name_h="../Results/histogram_Ratios_Cooperation_TSplane_genders_1_div_0.dat" elif "ages.dat" in filename1: name_h="../Results/histogram_Ratios_Cooperation_TSplane_ages_young_div_adult.dat" elif "round" in filename1: name_h="../Results/histogram_Ratios_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat" histograma_bines_gral.histograma_bins(list_ratios,20, name_h)
def main(): pupulation_age = "All" #"young" # or "adult" or "All" if pupulation_age == "young": min_age_threshold = 0 max_age_threshold = 15 elif pupulation_age == "adult": min_age_threshold = 16 max_age_threshold = 100 elif pupulation_age == "All": min_age_threshold = 0 max_age_threshold = 100 else: print "wrong age range" exit() gender_flag = "All" #"All" or 1 for males or 0 for females R = 10 P = 5 ####### to select results only from given rounds (both ends included) min_round = 1 max_round = 18 ######### input file filename = "../Data/userdata.pickle" master_list = pickle.load(open( filename, 'rb')) # es una lista: un elemento por jugador (541) ######### ######### output files Nbins_ages = 15 name_h_ages = "../Results/histogram_ages_gender" + str( gender_flag) + ".dat" Nbins_payoffs = 20 name_h_payoffs = "../Results/histogram_payoffs_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output_filename1 = "../Results/Cooperation_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output1 = open(output_filename1, 'wt') output_filename2 = "../Results/Racionality_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output2 = open(output_filename2, 'wt') output_filename3 = "../Results/Ambition_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output3 = open(output_filename3, 'wt') output_filename4 = "../Results/Payoff_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output4 = open(output_filename4, 'wt') output_filename5 = "../Results/Payoff_norm_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output5 = open(output_filename5, 'wt') output_filename6 = "../Results/SEM_cooperation_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" output_filename7 = "../Results/Numer_actions_TSplane_" + str( pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str( max_round) + "_gender" + str(gender_flag) + ".dat" ######### ### master_list tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...], 'nickname': u'Caesar', 'id': 2L}] #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda) # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...] dict_TSplane_list_actions = {} dict_TSplane_avg_coop = {} dict_TSplane_std_coop = {} dict_TSplane_sem_coop = {} # error of the mean =std/ sqrt(num points) dict_TSplane_list_rationality = {} dict_TSplane_avg_rationality = {} dict_TSplane_std_rationality = {} dict_TSplane_list_ambition = {} dict_TSplane_avg_ambition = {} dict_TSplane_std_ambition = {} dict_TSplane_list_payoff = {} dict_TSplane_avg_payoff = {} dict_TSplane_std_payoff = {} dict_TSplane_sem_payoff = {} dict_TSplane_list_payoff_norm = { } # normalized payoff by the maximun possible in that TS point dict_TSplane_avg_payoff_norm = {} dict_TSplane_std_payoff_norm = {} dict_TSplane_sem_payoff_norm = {} dict_TSplane_num_actions = {} list_ages = [] list_payoff_tot = [] # calculated (by Jordi) up to round #13 cont_diff = 0 for dictionary in master_list: # cada elemento de la lista es a su vez un dict payoff_total = float(dictionary['guany_total'] ) # this is calculated only up to round #13 !! partida = dictionary['partida'] list_payoff_tot.append(payoff_total) genero = dictionary['genere'] if genero == "h": genero = 1 elif genero == "d": genero = 0 if gender_flag == "All" or gender_flag == genero: num_elecciones = dictionary['num_eleccions'] age = int(dictionary['edat']) avg_racionalidad = dictionary['rationality'] avg_ambicion = dictionary['ambition'] num_rondas = len(dictionary['rondes']) nickname = unidecode(dictionary['nickname']).replace(" ", "_") user_id = dictionary['id'] if num_elecciones != num_rondas: cont_diff += 1 list_dict_rondas = dictionary['rondes'] list_ages.append(age) for dict_ronda in list_dict_rondas: ## cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0} T = int(dict_ronda['T']) S = int(dict_ronda['S']) list_four_possible_values = [P, R, T, S] punto_TS = (T, S) try: payoff = float(dict_ronda['guany']) payoff_norm = float(dict_ronda['guany']) / float( max(list_four_possible_values)) except TypeError: payoff = dict_ronda['guany'] # if payoff is None payoff_oponent = dict_ronda['guany_oponent'] rationality = dict_ronda['rationality'] ambition = dict_ronda['ambition'] if rationality != None: rationality = float(rationality) * 100. if ambition != None: ambition = float(ambition) * 100. round_number = dict_ronda['numronda'] action = dict_ronda['seleccio'] if action == "C": action = 1. elif action == "D": action = 0. # si no ha elegido nada, es None num_ronda = dict_ronda['numronda'] quadrant = dict_ronda['cuadrant'].replace(" ", "_").replace( "'", "") action_oponent = dict_ronda['seleccio_oponent'] if action_oponent == "C": action_oponent = 1. elif action_oponent == "D": action_oponent = 0. # si no ha elegido nada, es None oponent_id = dict_ronda['oponent'] if punto_TS in dict_TSplane_list_actions: if action != None: if age >= min_age_threshold and age <= max_age_threshold: if round_number <= max_round and round_number >= min_round: dict_TSplane_list_actions[punto_TS].append( action) # 1:C, 0:D if rationality != None: dict_TSplane_list_rationality[ punto_TS].append(rationality) if ambition != None: dict_TSplane_list_ambition[ punto_TS].append(ambition) dict_TSplane_list_payoff[punto_TS].append( payoff) dict_TSplane_list_payoff_norm[punto_TS].append( payoff_norm) else: if action != None: if age >= min_age_threshold and age <= max_age_threshold: if round_number <= max_round and round_number >= min_round: dict_TSplane_list_actions[punto_TS] = [] dict_TSplane_list_actions[punto_TS].append( action) if rationality != None: dict_TSplane_list_rationality[ punto_TS] = [] dict_TSplane_list_rationality[ punto_TS].append(rationality) if ambition != None: dict_TSplane_list_ambition[punto_TS] = [] dict_TSplane_list_ambition[ punto_TS].append(ambition) dict_TSplane_list_payoff[punto_TS] = [] dict_TSplane_list_payoff[punto_TS].append( payoff) dict_TSplane_list_payoff_norm[punto_TS] = [] dict_TSplane_list_payoff_norm[punto_TS].append( payoff_norm) # print "payoff:",payoff,"list:", list_four_possible_values,"max:", max(list_four_possible_values), payoff_norm old_T = None ####### the the avg cooperation per TS point for punto_TS in sorted(dict_TSplane_list_actions): dict_TSplane_avg_coop[punto_TS] = numpy.mean( dict_TSplane_list_actions[punto_TS]) dict_TSplane_std_coop[punto_TS] = numpy.std( dict_TSplane_list_actions[punto_TS]) dict_TSplane_sem_coop[punto_TS] = stats.sem( dict_TSplane_list_actions[punto_TS] ) # standard error =std / sqrt(num points) dict_TSplane_avg_payoff[punto_TS] = numpy.mean( dict_TSplane_list_payoff[punto_TS]) dict_TSplane_std_payoff[punto_TS] = numpy.std( dict_TSplane_list_payoff[punto_TS]) dict_TSplane_sem_payoff[punto_TS] = stats.sem( dict_TSplane_list_payoff[punto_TS]) dict_TSplane_avg_payoff_norm[punto_TS] = numpy.mean( dict_TSplane_list_payoff_norm[punto_TS]) dict_TSplane_std_payoff_norm[punto_TS] = numpy.std( dict_TSplane_list_payoff_norm[punto_TS]) dict_TSplane_sem_payoff_norm[punto_TS] = stats.sem( dict_TSplane_list_payoff_norm[punto_TS]) dict_TSplane_num_actions[punto_TS] = len( dict_TSplane_list_actions[punto_TS]) if old_T != punto_TS[0]: print >> output1 print >> output4 print >> output5 print >> output1, punto_TS[0], punto_TS[1], dict_TSplane_avg_coop[ punto_TS], dict_TSplane_std_coop[punto_TS], dict_TSplane_sem_coop[ punto_TS] old_T = punto_TS[0] print >> output4, punto_TS[0], punto_TS[1], dict_TSplane_avg_payoff[ punto_TS], dict_TSplane_std_payoff[ punto_TS], dict_TSplane_sem_payoff[punto_TS] old_T = punto_TS[0] print >> output5, punto_TS[0], punto_TS[ 1], dict_TSplane_avg_payoff_norm[ punto_TS], dict_TSplane_std_payoff_norm[ punto_TS], dict_TSplane_sem_payoff_norm[punto_TS] old_T = punto_TS[0] old_T = None for punto_TS in sorted(dict_TSplane_list_rationality): dict_TSplane_avg_rationality[punto_TS] = numpy.mean( dict_TSplane_list_rationality[punto_TS]) dict_TSplane_std_rationality[punto_TS] = numpy.std( dict_TSplane_list_rationality[punto_TS]) if old_T != punto_TS[0]: print >> output2 print >> output2, punto_TS[0], punto_TS[ 1], dict_TSplane_avg_rationality[ punto_TS], dict_TSplane_std_rationality[ punto_TS], dict_TSplane_std_rationality[ punto_TS] / numpy.sqrt( len(dict_TSplane_list_rationality[punto_TS])) old_T = punto_TS[0] old_T = None for punto_TS in sorted(dict_TSplane_list_ambition): dict_TSplane_avg_ambition[punto_TS] = numpy.mean( dict_TSplane_list_ambition[punto_TS]) dict_TSplane_std_ambition[punto_TS] = numpy.std( dict_TSplane_list_ambition[punto_TS]) if old_T != punto_TS[0]: print >> output3 print >> output3, punto_TS[0], punto_TS[1], dict_TSplane_avg_ambition[ punto_TS], dict_TSplane_std_ambition[ punto_TS], dict_TSplane_std_ambition[punto_TS] / numpy.sqrt( len(dict_TSplane_list_ambition[punto_TS])) old_T = punto_TS[0] old_T = punto_TS[0] histograma_bines_gral.histograma_bins( list_ages, Nbins_ages, name_h_ages ) #x_position , norm_count, count, norm_cumulat_count, cumulat_count , float(hist[b])/float(len(lista)) histograma_bines_gral.histograma_bins(list_payoff_tot, Nbins_payoffs, name_h_payoffs) print_values_dict_for_matrix_plotting(dict_TSplane_avg_coop, output_filename1) print_values_dict_for_matrix_plotting(dict_TSplane_sem_coop, output_filename6) print_values_dict_for_matrix_plotting(dict_TSplane_num_actions, output_filename7) output1.close() output2.close() output3.close() output4.close() output5.close() print "written output datafile:", output_filename1 print "written output datafile:", output_filename2 print "written output datafile:", output_filename3 print "written output datafile:", output_filename4 print "written output datafile:", output_filename5 print "num. times that #rounds != # elections", cont_diff print "lenght master list.", len(master_list)
def main(): pupulation_age = "All" #"young" # or "adult" or "All" if pupulation_age == "young": min_age_threshold = 0 max_age_threshold = 15 elif pupulation_age == "adult": min_age_threshold = 16 max_age_threshold = 100 elif pupulation_age == "All": min_age_threshold = 0 max_age_threshold = 100 else: print "wrong age range" exit() # R=10 # P=5 ####### to select results only from given rounds (both ends included) min_round = 1 max_round = 18 umbral_coop = 0.75 # to saparate good people from bastards in the lower Harmony Niter = 5000 # for the bootstrapping type_definition = "Harmony" # "lowerPD" "higherPD"# # #"higherHarmony" #"lowerHarmony" #"SD" # or "PD" "SH" "Harmony" print "Cooperation threshold for good people in", type_definition, umbral_coop print "Niter for bootstrapping:", Niter ######### input file filename = "../Data/userdata.pickle" master_list = pickle.load(open( filename, 'rb')) # es una lista: un elemento por jugador (541) ######### output files pickle_file_good_people = "../Results/list_good_guys_" + str( type_definition) + "_threshold_coop" + str(umbral_coop) + ".pickle" pickle_file_bad_people = "../Results/list_bad_guys_" + str( type_definition) + "_threshold_coop" + str(umbral_coop) + ".pickle" pickle_file_all = "../Results/list_all_users.pickle" Nbins_avg_coop = 20 name_h_avg_coop = "../Results/histogram_general_avg_coop.dat" ######### ### master_list tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...], 'nickname': u'Caesar', 'id': 2L}] #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda) # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...] num_valid_actions = 0. num_actions_in_focus_region = 0 num_coop_actions_in_focus_region = 0 dict_user_list_actions_in_focus_region = { } # that region is either the lower Harmony or the whole PD dict_user_avg_coop_in_focus_region = {} dict_user_list_actions = {} dict_user_avg_coop = {} num_users = float(len(master_list)) list_cooperators_in_focus_region = [ ] # if the cooperate at least once in the region list_defectors_in_focus_region = [] list_all_users = [] ##### loop over different users for dictionary in master_list: # cada elemento de la lista es a su vez un dict nickname = unidecode(dictionary['nickname']).replace(" ", "_") user_id = dictionary['id'] payoff_total = float(dictionary['guany_total'] ) # this is calculated only up to round #13 !! partida = dictionary['partida'] gender = dictionary['genere'] if gender == "h": gender = 1 elif gender == "d": gender = 0 num_elecciones = int(dictionary['num_eleccions']) age = int(dictionary['edat']) avg_racionalidad = dictionary['rationality'] avg_ambicion = dictionary['ambition'] num_rondas = len(dictionary['rondes']) list_dict_rondas = dictionary['rondes'] ######## list of rounds for a given user_id for dict_ronda in list_dict_rondas: ## cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0} T = int(dict_ronda['T']) S = int(dict_ronda['S']) punto_TS = (T, S) round_number = dict_ronda['numronda'] action = dict_ronda['seleccio'] if action == "C": action = 1. elif action == "D": action = 0. # si no ha elegido nada, es None if action != None: num_valid_actions += 1 if user_id not in list_all_users: list_all_users.append(user_id) num_ronda = dict_ronda['numronda'] quadrant = dict_ronda['cuadrant'].replace(" ", "_").replace("'", "") action_oponent = dict_ronda['seleccio_oponent'] if action_oponent == "C": action_oponent = 1. elif action_oponent == "D": action_oponent = 0. # si no ha elegido nada, es None #### for the general histogram of cooperation if user_id not in dict_user_list_actions: dict_user_list_actions[user_id] = [] if action != None: dict_user_list_actions[user_id].append(action) if type_definition == "Harmony": if S >= 5 and S <= 10: if T >= 5 and T <= 10: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append(user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "lowerHarmony": if S >= 5 and S <= 10: if T >= 5 and T <= 10: if S <= T: # the lower triangle of the Harmony game: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append( user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "higherHarmony": if S >= 5 and S <= 10: if T >= 5 and T <= 10: if S > T: # the higher triangle of the Harmony game: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append( user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "PD": if S >= 0 and S <= 5: if T >= 10 and T <= 15: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append(user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "higherPD": if S >= 0 and S <= 5: if T >= 10 and T <= 15: if S >= -10 + T: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append( user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "lowerPD": if S >= 0 and S <= 5: if T >= 10 and T <= 15: if S < -10 + T: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append( user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "SH": if S >= 0 and S <= 5: if T >= 5 and T <= 10: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append(user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 elif type_definition == "SD": if S >= 5 and S <= 10: if T >= 10 and T <= 15: if user_id not in dict_user_list_actions_in_focus_region: dict_user_list_actions_in_focus_region[ user_id] = [] if action == 1: if user_id not in list_cooperators_in_focus_region: list_cooperators_in_focus_region.append( user_id) num_coop_actions_in_focus_region += 1 elif action == 0: if user_id not in list_defectors_in_focus_region: list_defectors_in_focus_region.append(user_id) if action != None: dict_user_list_actions_in_focus_region[ user_id].append(action) num_actions_in_focus_region += 1 ###### end loop over user_ids in the main dict ######## obtaining the subset of user_id who cooperated > umbral_coop in the focus region list_avg_defectors_in_focus_region = [] list_avg_cooperators_in_focus_region = [] for user_id in dict_user_list_actions_in_focus_region: #over all user_ids who played in that region dict_user_avg_coop_in_focus_region[user_id] = numpy.mean( dict_user_list_actions_in_focus_region[user_id]) if dict_user_avg_coop_in_focus_region[user_id] > umbral_coop: list_avg_cooperators_in_focus_region.append(user_id) else: list_avg_defectors_in_focus_region.append(user_id) ###### for the histogram of general cooperation list_avg_coop = [] for user_id in dict_user_list_actions: list_avg_coop.append(numpy.mean(dict_user_list_actions[user_id])) histograma_bines_gral.histograma_bins(list_avg_coop, Nbins_avg_coop, name_h_avg_coop) # print "avg coop this group:", numpy.mean(list_avg_coop), "median:",numpy.median(list_avg_coop), min(list_avg_coop), max(list_avg_coop) print "# user_ids that play in", type_definition, len( dict_user_list_actions_in_focus_region ), " who cooperated >", umbral_coop * 100, "%:", len( list_avg_cooperators_in_focus_region) print "# items in the pickle (tot # users):", len(master_list) print "\n# unique defectors in", type_definition, "(defect at least once):", len( list_defectors_in_focus_region), " # avg-defectors:", len( list_avg_defectors_in_focus_region) print "\n# unique coop in", type_definition, "(cooperate at least once):", len( list_cooperators_in_focus_region ), " # actions in", type_definition, ":", num_actions_in_focus_region, " fract_coop:", num_coop_actions_in_focus_region / float( num_actions_in_focus_region ), " # avg cooperators (> coop_threshold) in", type_definition, ":", len( list_avg_cooperators_in_focus_region), " # avg-cooperators:", len( list_avg_cooperators_in_focus_region) # print "\nintersection unique users cooperators and defectors in lower Harmony", len(list(set(list_cooperators_in_focus_region) & set(list_defectors_in_focus_region))) print " tot # valid actions:", num_valid_actions, " tot # users:", num_users pickle.dump(list_avg_cooperators_in_focus_region, open(pickle_file_good_people, 'wb')) print "written pickle:", pickle_file_good_people pickle.dump(list_avg_defectors_in_focus_region, open(pickle_file_bad_people, 'wb')) print "written pickle:", pickle_file_bad_people pickle.dump(list_all_users, open(pickle_file_all, 'wb')) print "written pickle:", pickle_file_all ####### i read the master dict again to compare levels of cooperations for some sets of users list_actions_all_users = [] list_actions_coop_in_focus_region = [] list_actions_NO_coop_in_focus_region = [] for dictionary in master_list: # cada elemento de la lista es a su vez un dict user_id = dictionary['id'] list_dict_rondas = dictionary['rondes'] for dict_ronda in list_dict_rondas: action = dict_ronda['seleccio'] if action == "C": action = 1. elif action == "D": action = 0. if action != None: list_actions_all_users.append(action) if user_id in list_avg_cooperators_in_focus_region: list_actions_coop_in_focus_region.append(action) elif user_id in list_avg_defectors_in_focus_region: list_actions_NO_coop_in_focus_region.append(action) avg_real_coop_among_coop_in_focus_region = numpy.mean( list_actions_coop_in_focus_region) avg_real_coop_among_NONcoop_in_focus_region = numpy.mean( list_actions_NO_coop_in_focus_region) print "\nAvg coop all users all TS-plane:", numpy.mean( list_actions_all_users), " tot # actions:", len( list_actions_all_users) print "\n Bootstrapping..." ########## bootstrapping to see if the cooperators (def as coop > umbral_coop) in focus region are special print "\nCooperators in", type_definition, " vs all:" bootstrapping.zscore(list_actions_all_users, len(list_actions_coop_in_focus_region), Niter, avg_real_coop_among_coop_in_focus_region) print " # users in", type_definition, ":", len( list_avg_cooperators_in_focus_region) print "Avg coop this set users in all TS-plane:", avg_real_coop_among_coop_in_focus_region, " # actions:", len( list_actions_coop_in_focus_region) ########## bootstrapping to see if the defectors (def as coop <umbral_coop) in focus region are special print "\nDefectors in", type_definition, " vs all:" bootstrapping.zscore(list_actions_all_users, len(list_actions_NO_coop_in_focus_region), Niter, avg_real_coop_among_NONcoop_in_focus_region) print " # users in", type_definition, ":", len( list_avg_defectors_in_focus_region) print "Avg coop this set in all TS-plane:", avg_real_coop_among_NONcoop_in_focus_region, " # actions:", len( list_actions_NO_coop_in_focus_region)
def main(graph_name): G = nx.read_gml(graph_name) cutting_day = 100 # i use this only for the filenames for_testing_fixed_set = "YES" # when YES, fixed values param, to get all statistics on final distances etc # change the range for the parameters accordingly envelopes = "YES" Niter = 1000 # 100 iter seems to be enough (no big diff. with respect to 1000it) percent_envelope = 95. list_id_weekends_T3 = look_for_T3_weekends( G ) # T3 doesnt share fellows in the weekend (but they are the exception) Nbins = 200 # for the histogram of sum of distances all_team = "NO" # as adopters or not dir_real_data = '../Results/' dir = "../Results/weight_shifts/infection/" delta_end = 3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) if for_testing_fixed_set == "NO": output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_memory_" + str( Niter) + "iter_A_F_inferred_middle.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol = "../Results/Actual_evolution_adopters_from_inference.dat" file1 = open( filename_actual_evol, 'r' ) ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file = file1.readlines() list_actual_evol = [] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters = float(line.split("\t")[1]) list_actual_evol.append(num_adopters) ################################################################## prob_min = 0.7 prob_max = 0.701 delta_prob = 0.1 prob_Immune_min = 0.00 prob_Immune_max = 0.001 delta_prob_Immune = 0.1 ########## KEEP FIXED TO ONE infect_threshold_min = 1.00 # i can define the dose in units of the threshold infect_threshold_max = 1.001 delta_infect_threshold = 0.1 ############ dose_min = 0.2 # of a single encounter with an infected (starting from zero doesnt make sense) dose_max = 0.201 delta_dose = 0.01 dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection infect_threshold = infect_threshold_min dose = dose_min while dose <= dose_max: print " dose:", dose output_file2 = dir + "Average_time_evolution_Infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_FIXED_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_A_F_inferred_middle.dat" file2 = open(output_file2, 'wt') file2.close() # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions = [] list_dist_fixed_parameters = [] list_dist_fixed_parameters_testing_segment = [] list_abs_dist_at_ending_point_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_final_num_infected = [] list_abs_dist_point_by_point_indiv_simus_to_actual = [] list_dist_point_by_point_indiv_simus_to_actual = [] # list_abs_dist_at_cutting_day=[] for iter in range(Niter): # print " iter:",iter ########### set I.C. list_I = [] #list infected doctors max_order = 0 for n in G.nodes(): G.node[n]["status"] = "S" # all nodes are Susceptible G.node[n][ "infec_value"] = 0. # when this value goes over the infect_threshold, the dr is infected if G.node[n]['type'] == "shift": if G.node[n]['order'] > max_order: max_order = G.node[n][ 'order'] # to get the last shift-order for the time loop else: if G.node[n]['label'] == "Wunderink" or G.node[n][ "label"] == "Weiss": G.node[n][ "infec_value"] = infect_threshold + 1. G.node[n]["status"] = "I" list_I.append(G.node[n]['label']) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! old_num_adopters = 2 for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[n][ "label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" ################# the dynamics starts: shift_length = 5 #i know the first shift (order 0) is of length 5 t = 0 while t <= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: shift_length = int(G.node[n]['shift_length']) if shift_length == 2 and n not in list_id_weekends_T3: shift_length = 1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection for i in range(shift_length): if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: # with prob p the infection occurres G.node[doctor][ "infec_value"] += dose # and bumps the infection_value of that susceptible dr if G.node[doctor][ "infec_value"] >= infect_threshold: # becomes infected G.node[doctor][ "status"] = "I" # if G.node[doctor]["type"]=="A": # fellows participate in the dynamics, but i only consider the attendings as real adopters list_I.append( G.node[doctor] ["label"]) new_num_adopters = len(list_I) if shift_length == 5: # i estimate that adoption happens in the middle of the shift if t + 5 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 4 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 3 < max_order: list_single_t_evolution.append( new_num_adopters) if t + 2 < max_order: list_single_t_evolution.append( new_num_adopters) if t + 1 < max_order: list_single_t_evolution.append( new_num_adopters) t += 5 elif shift_length == 4: if t + 4 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 3 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 2 < max_order: list_single_t_evolution.append( new_num_adopters) if t + 1 < max_order: list_single_t_evolution.append( new_num_adopters) t += 4 elif shift_length == 3: if t + 3 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 2 < max_order: list_single_t_evolution.append( new_num_adopters) if t + 1 < max_order: list_single_t_evolution.append( new_num_adopters) t += 3 elif shift_length == 2: if t + 2 < max_order: list_single_t_evolution.append( old_num_adopters) if t + 1 < max_order: list_single_t_evolution.append( new_num_adopters) t += 2 elif shift_length == 1: if t + 1 < max_order: list_single_t_evolution.append( new_num_adopters) t += 1 old_num_adopters = new_num_adopters ######## end t loop list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called. compare_two_curves(list_actual_evol, list_single_t_evolution)) list_dist_fixed_parameters_testing_segment.append( compare_real_evol_vs_simus_to_be_called. compare_two_curves_testing_segment( list_actual_evol, list_single_t_evolution, cutting_day)) list_abs_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1] - list_actual_evol[-1] ) # i save the distance at the ending point between the current simu and actual evol list_final_num_infected.append(list_single_t_evolution[-1]) for index in range(len(list_single_t_evolution)): list_abs_dist_point_by_point_indiv_simus_to_actual.append( abs(list_single_t_evolution[index] - list_actual_evol[index])) list_dist_point_by_point_indiv_simus_to_actual.append( list_single_t_evolution[index] - list_actual_evol[index]) ######## end loop Niter list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)) if for_testing_fixed_set == "NO": file3 = open(output_file3, 'at') # i print out the landscape print >> file3, prob_infection, prob_Immune, numpy.mean( list_abs_dist_at_ending_point_fixed_parameters ), numpy.mean(list_dist_fixed_parameters), numpy.mean( list_final_num_infected), numpy.std( list_final_num_infected) file3.close() if ( numpy.mean( list_abs_dist_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end file2 = open(output_file2, 'at') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file2, s, numpy.mean(list_fixed_t) file2.close() print "printed out: ", output_file2 # raw_input() if envelopes == "YES": calculate_envelope_set_curves.calculate_envelope( list_lists_t_evolutions, percent_envelope, "Infection", [prob_infection, prob_Immune]) if for_testing_fixed_set == "YES": num_valid_endings = 0. for item in list_abs_dist_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings += 1. print "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" histograma_gral_negv_posit.histograma( list_dist_at_ending_point_fixed_parameters, histogram_filename) histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, Nbins, histogram_filename2) histogram_filename3 = "../Results/weight_shifts/histogr_sum_dist_testing_segment_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" #print list_dist_fixed_parameters_testing_segment histograma_bines_gral.histograma_bins_zero( list_dist_fixed_parameters_testing_segment, Nbins, histogram_filename3) print min(list_dist_fixed_parameters_testing_segment), max( list_dist_fixed_parameters_testing_segment) histogram_filename4 = "../Results/weight_shifts/histogr_abs_dist_point_by_point_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" histograma_gral_negv_posit.histograma( list_abs_dist_point_by_point_indiv_simus_to_actual, histogram_filename4) histogram_filename5 = "../Results/weight_shifts/histogr_dist_point_by_point_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" histograma_gral_negv_posit.histograma( list_dist_point_by_point_indiv_simus_to_actual, histogram_filename5) output_file10 = "../Results/weight_shifts/Summary_results_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str( cutting_day) + "_A_F_inferred_middle.dat" file10 = open(output_file10, 'wt') print >> file10, "Summary results from best fit infection _memory with", Niter, "iter, and with values for the parameters: prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n" print >> file10, "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" print >> file10, "written optimum best fit evolution file:", output_file2 print >> file10, "written histogram file: ", histogram_filename file10.close() print "written Summary file: ", output_file10 dose += delta_dose prob_infection += delta_prob prob_Immune += delta_prob_Immune
def main(): Niter = 10000 # for bootstrapping ####### input network files to collect the info from graph_name = "./network_all_users/GC_full_network_all_users_merged_small_comm_roles_diff_layers1_roles_diff_layers1.5.gml" ################## ######### i build the networks (remember label attribute matches id in users table) G = nx.read_gml(graph_name) G_GC = nx.connected_component_subgraphs(G)[0] # print "network size:", len(G.nodes()), "GC size:", len(G_GC.nodes()) # the network IS just the GC 1910 ################ csv_file = "analysis_time_bins_bmi_groups/master_users_file_weight_change_first6months_2w_ins.txt" ############# dict_label_ck_id = {} dict_ck_id_label = {} dict_id_label = {} dict_label_id = {} for node in G.nodes(): label = G.node[node]["label"] dict_id_label[node] = label dict_label_id[label] = node print "getting user's info from csv....." ################# getting info from csv file_csv_info = open(csv_file, 'r') list_lines_file_csv_info = file_csv_info.readlines() cont = 0 cont_2wins = 0 cont_small_clusters = 0 cont_networked = 0 cont_GC = 0 cont_one_weigh_in = 0 cont_non_networked = 0 list_percent_weight_changes_6months_all_2wins = [] list_percent_weight_changes_6months_networked = [] list_percent_weight_changes_6months_non_networked = [] list_percent_weight_changes_6months_GC = [] list_percent_weight_changes_6months_small_clusters = [] list_percent_weight_changes_6months_with_R6friends = [] list_percent_weight_changes_6months_0R6s = [] list_percent_weight_changes_6months_1R6s = [] list_percent_weight_changes_6months_2R6s = [] list_percent_weight_changes_6months_3R6s = [] list_percent_weight_changes_6months_4R6s = [] list_percent_weight_changes_6months_5R6s = [] list_percent_weight_changes_6months_6R6s = [] cont_fat_fingers = 0 list_users = [] for line in list_lines_file_csv_info: if cont > 0: list_elements_line = line.strip("\r\n").split(" ") ck_id = str(list_elements_line[0]) label = str(list_elements_line[1]) if ck_id not in list_users: list_users.append(ck_id) dict_label_ck_id[label] = ck_id dict_ck_id_label[ck_id] = label weigh_change_6months = float(list_elements_line[2]) percent_weight_change_6months = float(list_elements_line[3]) if percent_weight_change_6months < 100. and percent_weight_change_6months > -100.: number_weigh_ins_6months = int(list_elements_line[4]) activity_6months = int(list_elements_line[5]) degree = int(list_elements_line[20]) p_friends = int(list_elements_line[15]) if number_weigh_ins_6months >= 2: cont_2wins += 1 list_percent_weight_changes_6months_all_2wins.append( percent_weight_change_6months) if p_friends == 1: cont_networked += 1 list_percent_weight_changes_6months_networked.append( percent_weight_change_6months) if label in dict_label_id: #GC node = dict_label_id[label] list_percent_weight_changes_6months_GC.append( percent_weight_change_6months) cont_GC += 1 if G.node[node]["R6_overlap"] > 0: list_percent_weight_changes_6months_with_R6friends.append( percent_weight_change_6months) if G.node[node]["R6_overlap"] == 1: list_percent_weight_changes_6months_1R6s.append( percent_weight_change_6months) elif G.node[node]["R6_overlap"] == 2: list_percent_weight_changes_6months_2R6s.append( percent_weight_change_6months) elif G.node[node]["R6_overlap"] == 3: list_percent_weight_changes_6months_3R6s.append( percent_weight_change_6months) elif G.node[node]["R6_overlap"] == 4: list_percent_weight_changes_6months_4R6s.append( percent_weight_change_6months) elif G.node[node]["R6_overlap"] == 5: list_percent_weight_changes_6months_5R6s.append( percent_weight_change_6months) elif G.node[node]["R6_overlap"] >= 6: list_percent_weight_changes_6months_6R6s.append( percent_weight_change_6months) else: list_percent_weight_changes_6months_0R6s.append( percent_weight_change_6months) else: list_percent_weight_changes_6months_small_clusters.append( percent_weight_change_6months) cont_small_clusters += 1 else: list_percent_weight_changes_6months_non_networked.append( percent_weight_change_6months) cont_non_networked += 1 else: cont_one_weigh_in += 1 else: cont_fat_fingers += 1 cont += 1 ############## print "number of fat fingers:", cont_fat_fingers, "(excluded)" print "total sample size:", len( list_users), "\n with >=2 w-ins:", cont_2wins print "networked:", cont_networked, " non-networked:", cont_non_networked, "\nsize GC from csv:", cont_GC, "\nsmall clusters:", cont_small_clusters print "users with just one weigh in:", cont_one_weigh_in print "\navg. percent weight change:" print " all with two w-ins:", numpy.mean( list_percent_weight_changes_6months_all_2wins), "+/-", numpy.std( list_percent_weight_changes_6months_all_2wins) / numpy.sqrt( float(len(list_percent_weight_changes_6months_all_2wins) - 1.)), "set size:", len( list_percent_weight_changes_6months_all_2wins) print " non networked:", numpy.mean( list_percent_weight_changes_6months_non_networked ), "+/-", numpy.std( list_percent_weight_changes_6months_non_networked) / numpy.sqrt( float(len(list_percent_weight_changes_6months_non_networked) - 1.)), "set size:", len( list_percent_weight_changes_6months_non_networked) print " networked:", numpy.mean( list_percent_weight_changes_6months_networked), "+/-", numpy.std( list_percent_weight_changes_6months_networked) / numpy.sqrt( float(len(list_percent_weight_changes_6months_networked) - 1.)), "set size:", len( list_percent_weight_changes_6months_networked) print " small clusters:", numpy.mean( list_percent_weight_changes_6months_small_clusters), "+/-", numpy.std( list_percent_weight_changes_6months_small_clusters) / numpy.sqrt( float( len(list_percent_weight_changes_6months_small_clusters) - 1.)), "set size:", len( list_percent_weight_changes_6months_small_clusters) print " GC:", numpy.mean( list_percent_weight_changes_6months_GC ), "+/-", numpy.std(list_percent_weight_changes_6months_GC) / numpy.sqrt( float(len(list_percent_weight_changes_6months_GC) - 1.)), "set size:", len(list_percent_weight_changes_6months_GC) print " without R6s:", numpy.mean( list_percent_weight_changes_6months_0R6s ), "+/-", numpy.std(list_percent_weight_changes_6months_0R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_0R6s) - 1.)), "set size:", len(list_percent_weight_changes_6months_0R6s) print " with R6s:", numpy.mean( list_percent_weight_changes_6months_with_R6friends), "+/-", numpy.std( list_percent_weight_changes_6months_with_R6friends) / numpy.sqrt( float( len(list_percent_weight_changes_6months_with_R6friends) - 1.)), "set size:", len( list_percent_weight_changes_6months_with_R6friends ), "\n" ########################################################## ############ Bootstrap for comparing the different sets: print "\n\nComparing Networked population vs. 2-weigh-in population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_all_2wins, len(list_percent_weight_changes_6months_networked))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic networked population weight change vs. 2weigh-in pop.:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_networked) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_networked_from_2wins_weight_changes.dat" ) ##### print "\n\nComparing GC population vs. 2-weigh-in population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_all_2wins, len(list_percent_weight_changes_6months_GC))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic GC population weight change vs. 2weigh-in population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_GC) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_GC_from_2wins_weight_changes.dat" ) ##### print "\n\nComparing R6s friends population vs. 2-weigh-in population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_all_2wins, len(list_percent_weight_changes_6months_with_R6friends))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic R6s friends population weight change vs. 2weigh-in population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_with_R6friends) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_R6s_friends_from_2wins_weight_changes.dat" ) ##### print "\n\nComparing Small clusters population vs. 2-weigh-in population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_all_2wins, len(list_percent_weight_changes_6months_small_clusters))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic small clusters population weight change vs. 2weigh-in population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_small_clusters) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_small_clusters_from_2wins_weight_changes.dat" ) ##### print "\n\nComparing Non-networked population vs. 2-weigh-in population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_all_2wins, len(list_percent_weight_changes_6months_non_networked))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic Non networked population weight change vs. 2weigh-in population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_non_networked) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_non_networked_from_2wins_weight_changes.dat" ) ####### ######### print "\n\nComparing GC population vs Networked population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_networked, len(list_percent_weight_changes_6months_GC))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic GC population weight change vs. Networked population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_GC) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_GC_from_networked_weight_changes.dat" ) ##### print "\n\nComparing R6s friends population vs Networked population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_networked, len(list_percent_weight_changes_6months_with_R6friends))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic R6s friends population weight change vs. Networked population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_with_R6friends) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_with_R6friends_from_networked_weight_changes.dat" ) ##### print "\n\nComparing small clusters population vs Networked population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_networked, len(list_percent_weight_changes_6months_small_clusters))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic small clusters population weight change vs. Networked population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_small_clusters) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_small_clusters_from_networked_weight_changes.dat" ) ##### ##### print "\n\nComparing R6s friends population vs GC population" list_synthetic_averages_for_distribution = [] # 2 w-ins vs networked pop. for i in range(Niter): synthetic_mean = numpy.mean( sample_with_replacement( list_percent_weight_changes_6months_GC, len(list_percent_weight_changes_6months_with_R6friends))) list_synthetic_averages_for_distribution.append(synthetic_mean) # print synthetic_mean print "average all synthetic values:", numpy.mean( list_synthetic_averages_for_distribution), "+/-", numpy.std( list_synthetic_averages_for_distribution) print "z-score synthetic R6s friends population weight change vs. GC population:", ( numpy.mean(list_synthetic_averages_for_distribution) - numpy.mean(list_percent_weight_changes_6months_with_R6friends) ) / numpy.std(list_synthetic_averages_for_distribution) histograma_bines_gral.histograma_bins( list_synthetic_averages_for_distribution, 50, "./analysis_time_bins_bmi_groups/histogram_synthetic_with_R6friends_from_GC_weight_changes.dat" ) print "\n" #################### histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_all_2wins, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_2weigh_ins.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_networked, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_networked.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_non_networked, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_non_networked.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_GC, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_GC.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_small_clusters, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_small_clusters.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_with_R6friends, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_with_R6friends.dat" ) histograma_bines_gral.histograma_bins( list_percent_weight_changes_6months_0R6s, 50, "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_without_0R6s_friends.dat" ) ################# print "\nRegarding weight change and having one or more R6s as friends:" print " 0 R6s:", numpy.mean( list_percent_weight_changes_6months_0R6s ), numpy.std(list_percent_weight_changes_6months_0R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_0R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_0R6s) print " 1 R6s:", numpy.mean( list_percent_weight_changes_6months_1R6s ), numpy.std(list_percent_weight_changes_6months_1R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_1R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_1R6s) print " 2 R6s:", numpy.mean( list_percent_weight_changes_6months_2R6s ), numpy.std(list_percent_weight_changes_6months_2R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_2R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_2R6s) print " 3 R6s:", numpy.mean( list_percent_weight_changes_6months_3R6s ), numpy.std(list_percent_weight_changes_6months_3R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_3R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_3R6s) print " 4 R6s:", numpy.mean( list_percent_weight_changes_6months_4R6s ), numpy.std(list_percent_weight_changes_6months_4R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_4R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_4R6s) print " 5 R6s:", numpy.mean( list_percent_weight_changes_6months_5R6s ), numpy.std(list_percent_weight_changes_6months_5R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_5R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_5R6s) print " >= 6 R6s:", numpy.mean( list_percent_weight_changes_6months_6R6s ), numpy.std(list_percent_weight_changes_6months_6R6s) / numpy.sqrt( float(len(list_percent_weight_changes_6months_5R6s) - 1.)), " size:", len(list_percent_weight_changes_6months_6R6s)
def main(graph_name_GC): H1 = nx.read_gml(graph_name_GC) # just GC, but with Role info H1 = nx.connected_component_subgraphs(H1)[0] print len(H1.nodes()) list_R6_labels = [] dicc_label_node = {} list_network_ids = [] for node in H1.nodes(): if (H1.node[node]['role'] == "special_R6"): H1.node[node]['role'] = "R6" list_network_ids.append( int(H1.node[node]['label']) ) # this actually corresponds to the id from the users table in the DB dicc_label_node[int(H1.node[node]['label'])] = node if (H1.node[node]['role'] == "R6"): list_R6_labels.append( int(H1.node[node]['label']) ) # this actually corresponds to the id from the users table in the DB #print "# R6s:",len(list_R6_labels) # print len(dicc_label_node) database = "calorie_king_social_networking_2010" server = "tarraco.chem-eng.northwestern.edu" user = "******" passwd = "n1ckuDB!" db = Connection(server, database, user, passwd) query1 = """select * from users""" result1 = db.query(query1) # is a list of dict. file1 = open("num_messg_to_friends_vs_Gini.dat", 'wt') file2 = open("num_messg_from_friends_vs_Gini.dat", 'wt') file3 = open("num_messg_friends_vs_Gini.dat", 'wt') file11 = open("num_messg_to_friends_vs_Gini_R6s.dat", 'wt') file12 = open("num_messg_from_friends_vs_Gini_R6s.dat", 'wt') file13 = open("num_messg_friends_vs_Gini_R6s.dat", 'wt') file111 = open("num_messg_to_friends_vs_Gini_R6overlap.dat", 'wt') file112 = open("num_messg_from_friends_vs_Gini_R6overlap.dat", 'wt') file113 = open("num_messg_friends_vs_Gini_R6overlap.dat", 'wt') file211 = open("num_blog_posts.dat", 'wt') file212 = open("num_home_page_posts.dat", 'wt') file213 = open("num_lesson_comments.dat", 'wt') file214 = open("num_forum_posts.dat", 'wt') file214 = open("num_tot_public_messages.dat", 'wt') dict_characteristics_users = {} dicc_ck_label = {} for r1 in result1: #first i build a dicc ck_id vs. label ck_id = r1['ck_id'] label = int( r1['id']) # this corresponds to the 'label' in the gml files dicc_ck_label[ck_id] = label try: node = dicc_label_node[label] H1.node[node]['ck_id'] = ck_id # print "\n",H1.node[node]['ck_id'], label except KeyError: pass print len(dicc_ck_label) list_sent_from_not_friends = [] list_sent_to_not_friends = [] list_tot_sent = [] list_tot_received = [] list_to_friends = [] list_from_friends = [] list_tot_messg_friends = [] list_GINI_weighted_to_friends = [] # one value per USER list_GINI_weighted_from_friends = [] list_GINI_weighted_tot_messg_friends = [] list_GINI_weighted_to_friends_R6s = [] # one value per USER list_GINI_weighted_from_friends_R6s = [] list_GINI_weighted_tot_messg_friends_R6s = [] list_GINI_weighted_to_friends_R6overlap = [] # one value per USER list_GINI_weighted_from_friends_R6overlap = [] list_GINI_weighted_tot_messg_friends_R6overlap = [] list_weights_friendships = [] list_weights_friendships_with_R6s = [] list_weights_friendships_to_R6s = [] list_weights_friendships_from_R6s = [] list_blog_posts = [] list_home_page = [] list_forum_posts = [] list_lesson_com = [] list_tot_public_mess = [] num_users = 0. for r1 in result1: #loop over users num_users += 1. print int(num_users) ck_id = r1['ck_id'] label = int( r1['id']) # this corresponds to the 'label' in the gml files try: node = dicc_label_node[label] except KeyError: pass query6 = "SELECT * FROM activity_combined where activity_flag != 'WI' and activity_flag != 'PM' and ck_id='" + str( ck_id) + "' " result6 = db.query(query6) tot_public_mess = len(result6) blog_posts = 0 home_page = 0 forum_posts = 0 lesson_com = 0 for r6 in result6: if r6['activity_flag'] == 'BP': blog_posts += 1 elif r6['activity_flag'] == 'HP': home_page += 1 elif r6['activity_flag'] == 'FP': forum_posts += 1 elif r6['activity_flag'] == 'LC': lesson_com += 1 list_blog_posts.append(blog_posts) list_home_page.append(home_page) list_forum_posts.append(forum_posts) list_lesson_com.append(lesson_com) list_tot_public_mess.append(tot_public_mess) print ck_id, tot_public_mess, blog_posts, home_page, forum_posts, lesson_com # if num_users <=5000: # JUST TO TEST THE CODE if label in list_network_ids: # if the user is in the network, i check how many messages they send each other to_not_friends = 0 from_not_friends = 0 print "\n\nnode label", label, ck_id, "has degree:", H1.degree( node) query2 = "select * from friends where (src ='" + str( ck_id) + "')or (dest ='" + str(ck_id) + "') " result2 = db.query(query2) degree = len(result2) query3 = "select * from private_messages where (src_id ='" + str( ck_id ) + "') " # there are messages sent by/to people not in the Users table, that is because they join the system prior 1-jan-2009, and are not part of the 47,000 users. result3 = db.query(query3) num_sent = float(len(result3)) list_tot_sent.append(num_sent) for r3 in result3: # i count how many messages are sent to friends and non-friends ck_friend = r3['dest_id'] if ck_friend in dicc_ck_label: # because some messages are NOT sent by users (join date prior jan.2009) label_friend = dicc_ck_label[ck_friend] if label_friend in dicc_label_node: node_friend = dicc_label_node[label_friend] flag_friend = 0 node_sender = dicc_label_node[ label] # the user i am currently studying for n in H1.neighbors(node_sender): if n == node_friend: flag_friend = 1 if flag_friend == 0: to_not_friends += 1 query4 = "select * from private_messages where (dest_id ='" + str( ck_id) + "') " result4 = db.query(query4) num_received = float(len(result4)) list_tot_received.append(float(num_received)) for r4 in result4: # i count how many messages are from friends and non-friends ck_friend = str(r4['src_id']) if ck_friend in dicc_ck_label: # i double check, because some messages are NOT sent by users...(join date prior jan.2009) label_friend = dicc_ck_label[ck_friend] if label_friend in dicc_label_node: node_friend = dicc_label_node[label_friend] flag_friend = 0 node_receiver = dicc_label_node[label] for n in H1.neighbors(node_receiver): if n == node_friend: flag_friend = 1 if flag_friend == 0: from_not_friends += 1 query5 = "select * from private_messages where (src_id ='" + str( ck_id) + "')or (dest_id ='" + str( ck_id) + "') " # all messages result5 = db.query(query5) num_tot_messg = float(len(result5)) num_messg_friends = 0. num_messg_to_friends = 0. num_messg_from_friends = 0. flag_sent = 0 flag_received = 0 list_weighted_to_friends = [ ] # one value per FRIEND of a given user list_weighted_from_friends = [] list_weighted_tot_messg_friends = [] list_weighted_to_friends_norm = [ ] # one value per FRIEND of a given user, normalized by the tot number of messages that user sent list_weighted_from_friends_norm = [] list_weighted_tot_messg_friends_norm = [] list_weighted_to_friends_R6s_norm = [ ] # one value per FRIEND of a given user, normalized by the tot number of messages that user sent list_weighted_from_friends_R6s_norm = [] list_weighted_tot_messg_friends_R6s_norm = [] for f in H1.neighbors(node): messg_to_one_friend = 0. #looking at a particular friend messg_from_one_friend = 0. messg_one_friend = 0. for r5 in result5: if r5['src_id'] == ck_id and r5['dest_id'] == H1.node[f][ 'ck_id']: num_messg_to_friends += 1. num_messg_friends += 1. flag_sent = 1 messg_to_one_friend += 1. messg_one_friend += 1. elif r5['dest_id'] == ck_id and r5['src_id'] == H1.node[f][ 'ck_id']: num_messg_from_friends += 1. num_messg_friends += 1. flag_received = 1 messg_from_one_friend += 1. messg_one_friend += 1. list_weighted_to_friends.append( messg_to_one_friend ) # weight of each friendship (not normalized) list_weighted_from_friends.append(messg_from_one_friend) list_weighted_tot_messg_friends.append(messg_one_friend) if H1.node[f]['role'] == 'R6': #if the friend is an R6s list_weights_friendships_to_R6s.append( messg_from_one_friend) list_weighted_to_friends_R6s_norm.append( messg_to_one_friend) if H1.node[node]['role'] == 'R6': list_weights_friendships_from_R6s.append( messg_to_one_friend) list_weighted_from_friends_R6s_norm.append( messg_from_one_friend) if H1.node[node]['role'] == 'R6' or H1.node[f]['role'] == 'R6': list_weights_friendships_with_R6s.append(messg_one_friend) list_weighted_tot_messg_friends_R6s_norm.append( messg_one_friend) for item in list_weighted_tot_messg_friends: if sum(list_weighted_tot_messg_friends) > 0: list_weighted_tot_messg_friends_norm.append( item / sum(list_weighted_tot_messg_friends)) for item in list_weighted_to_friends: if sum(list_weighted_to_friends) > 0: list_weighted_to_friends_norm.append( item / sum(list_weighted_to_friends)) for item in list_weighted_from_friends: if sum(list_weighted_from_friends) > 0: list_weighted_from_friends_norm.append( item / sum(list_weighted_from_friends)) for i in range( len(list_weighted_tot_messg_friends_R6s_norm) ): # how important is the communication with any R6 friend, compare to the tot # messag if sum(list_weighted_tot_messg_friends_R6s_norm) > 0: list_weighted_tot_messg_friends_R6s_norm[ i] = list_weighted_tot_messg_friends_R6s_norm[ i] / float( sum(list_weighted_tot_messg_friends_R6s_norm)) for i in range(len(list_weighted_to_friends_R6s_norm)): if sum(list_weighted_to_friends_R6s_norm) > 0: list_weighted_to_friends_R6s_norm[ i] = list_weighted_to_friends_R6s_norm[i] / float( sum(list_weighted_to_friends_R6s_norm)) for i in range(len(list_weighted_from_friends_R6s_norm)): if sum(list_weighted_from_friends_R6s_norm) > 0: list_weighted_from_friends_R6s_norm[ i] = list_weighted_from_friends_R6s_norm[i] / float( sum(list_weighted_from_friends_R6s_norm)) # no puedo normalizar over and over again los primero elementos muchas mas veces que los ultimos agnadidos!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! for i in range( len(list_weighted_tot_messg_friends) ): # how important is the communication with any R6 friend, compare to the tot # messag list_weights_friendships_with_R6s.append( list_weighted_tot_messg_friends[i]) for i in range(len(list_weighted_to_friends)): list_weights_friendships_to_R6s.append( list_weighted_to_friends[i]) for i in range(len(list_weighted_from_friends)): list_weights_friendships_from_R6s.append( list_weighted_from_friends[i]) list_to_friends.append(num_messg_to_friends) list_from_friends.append(num_messg_from_friends) list_tot_messg_friends.append(num_messg_friends) # print "norm list weighted tot friendships:",list_weighted_tot_messg_friends_norm,"with R6s:",list_weights_friendships_with_R6s # i calculate how skewed friendships for a given user are: if len(list_weighted_to_friends) > 0 and sum( list_weighted_to_friends) > 0: Gini_to_friends = GINI_coef.calculate_GINI( list_weighted_to_friends) list_GINI_weighted_to_friends.append( Gini_to_friends) # one value per USER # print ",to friends: ", list_weighted_to_friends, sum(list_weighted_to_friends),Gini_to_friends print >> file1, H1.degree( node), sum(list_weighted_to_friends ), sum(list_weighted_to_friends) / float( H1.degree(node)), Gini_to_friends if (H1.node[node]['role'] == "R6"): list_GINI_weighted_to_friends_R6s.append(Gini_to_friends) print >> file11, H1.degree( node), sum(list_weighted_to_friends ), sum(list_weighted_to_friends) / float( H1.degree(node)), Gini_to_friends if (H1.node[node]['R6_overlap'] > 0): list_GINI_weighted_to_friends_R6overlap.append( Gini_to_friends) print >> file111, H1.degree( node), sum(list_weighted_to_friends ), sum(list_weighted_to_friends) / float( H1.degree(node)), Gini_to_friends if len(list_weighted_from_friends) > 0 and sum( list_weighted_from_friends) > 0: Gini_from_friends = GINI_coef.calculate_GINI( list_weighted_from_friends) list_GINI_weighted_from_friends.append(Gini_from_friends) # print ",from friends: ", list_weighted_from_friends, sum(list_weighted_from_friends),Gini_from_friends print >> file2, H1.degree( node), sum(list_weighted_from_friends ), sum(list_weighted_from_friends) / float( H1.degree(node)), Gini_from_friends if (H1.node[node]['role'] == "R6"): list_GINI_weighted_from_friends_R6s.append( Gini_from_friends) print >> file12, H1.degree( node), sum(list_weighted_from_friends ), sum(list_weighted_from_friends) / float( H1.degree(node)), Gini_from_friends if (H1.node[node]['R6_overlap'] > 0): list_GINI_weighted_from_friends_R6overlap.append( Gini_from_friends) print >> file112, H1.degree( node), sum(list_weighted_from_friends ), sum(list_weighted_from_friends) / float( H1.degree(node)), Gini_from_friends if len(list_weighted_tot_messg_friends) > 0 and sum( list_weighted_from_friends) > 0: Gini_friends = GINI_coef.calculate_GINI( list_weighted_tot_messg_friends) list_GINI_weighted_tot_messg_friends.append(Gini_friends) # print ",tot: ",list_weighted_tot_messg_friends , sum(list_weighted_tot_messg_friends),Gini_friends print >> file3, H1.degree( node), sum(list_weighted_tot_messg_friends ), sum(list_weighted_tot_messg_friends) / float( H1.degree(node)), Gini_friends if (H1.node[node]['role'] == "R6"): list_GINI_weighted_tot_messg_friends_R6s.append( Gini_friends) print >> file13, H1.degree(node), sum( list_weighted_tot_messg_friends ), sum(list_weighted_tot_messg_friends) / float( H1.degree(node)), Gini_friends if (H1.node[node]['R6_overlap'] > 0): list_GINI_weighted_tot_messg_friends_R6overlap.append( Gini_friends) print >> file113, H1.degree(node), sum( list_weighted_tot_messg_friends ), sum(list_weighted_tot_messg_friends) / float( H1.degree(node)), Gini_friends if num_received != 0: list_sent_from_not_friends.append(float(from_not_friends)) if num_sent != 0: list_sent_to_not_friends.append(float(to_not_friends)) file1.close() file2.close() file3.close() print "average from_not_friends:", numpy.mean(list_sent_from_not_friends) print "average to_not_friends:", numpy.mean(list_sent_to_not_friends) print "average to_friends:", numpy.mean(list_to_friends) print "average from_friends:", numpy.mean(list_from_friends) print "average tot messg friends:", numpy.mean(list_tot_messg_friends) print "average tot sent:", numpy.mean(list_tot_sent) print "average tot received:", numpy.mean(list_tot_received) histograma_gral.histograma(list_sent_from_not_friends, "not_from_friends") histograma_gral.histograma(list_sent_to_not_friends, "not_to_friends") histograma_gral.histograma(list_tot_sent, "tot_sent") histograma_gral.histograma(list_tot_received, "tot_received") histograma_gral.histograma( list_to_friends, "to_friends") #data, string_for_output_file_name histograma_gral.histograma(list_from_friends, "from_friends") histograma_gral.histograma(list_tot_messg_friends, "tot_friends") histograma_bines_gral.histograma_bins(list_GINI_weighted_to_friends, 75, "Gini_weight_to_friends") histograma_bines_gral.histograma_bins(list_GINI_weighted_from_friends, 75, "Gini_weight_from_friends") histograma_bines_gral.histograma_bins(list_GINI_weighted_tot_messg_friends, 75, "Gini_weight_tot_friends") histograma_bines_gral.histograma_bins(list_GINI_weighted_to_friends_R6s, 75, "Gini_weight_to_friends_R6s") histograma_bines_gral.histograma_bins(list_GINI_weighted_from_friends_R6s, 75, "Gini_weight_from_friends_R6s") histograma_bines_gral.histograma_bins( list_GINI_weighted_tot_messg_friends_R6s, 75, "Gini_weight_tot_friends_R6s") histograma_bines_gral.histograma_bins( list_GINI_weighted_to_friends_R6overlap, 75, "Gini_weight_to_friends_R6overlap") histograma_bines_gral.histograma_bins( list_GINI_weighted_from_friends_R6overlap, 75, "Gini_weight_from_friends_R6overlap") histograma_bines_gral.histograma_bins( list_GINI_weighted_tot_messg_friends_R6overlap, 75, "Gini_weight_tot_friends_R6overlap") histograma_gral.histograma(list_blog_posts, "num_blog_posts") histograma_gral.histograma(list_home_page, "num_home_page_posts") histograma_gral.histograma(list_forum_posts, "num_forum_posts") histograma_gral.histograma(list_lesson_com, "num_lesson_com") histograma_gral.histograma(list_tot_public_mess, "num_tot_public_messages")
def main(): file2 = open("./Results/Scatter_plot_length_slope_lin.dat", 'wt') file3 = open("./Results/Scatter_plot_tau_deltaY_exp.dat", 'wt') file4 = open("./Results/Summary_results_cutting_time_series.dat", 'wt') database = "calorie_king_social_networking_2010" server = "tarraco.chem-eng.northwestern.edu" user = "******" passwd = "n1ckuDB!" db = Connection(server, database, user, passwd) query = """select * from weigh_in_cuts order by id, start_day""" result = db.query(query) # is a list of dict. list_distinct_users = [] list_num_segments_per_user = [] list_quality_values_lin = [] #DW score list_quality_values_con = [] #DW score list_quality_values_exp = [] #DW score list_pairs_tau_deltaW = [] list_pairs_slope_time_length = [] num_segments = 0 num_lin_segments = 0 num_con_segments = 0 num_exp_segments = 0 num_isolates = 0 num_segments_per_user = 0 for line in result: # each line is a dict, each line is a segment user = line['ck_id'] fit_type = str(line['fit_type']) start_day = int(line['start_day']) stop_day = int(line['stop_day']) start_weight = float(line['start_weight']) stop_weight = float(line['stop_weight']) if fit_type != "isolated": # isolated datapoint (with gaps at both sides) num_segments += 1 try: quality = float(line['quality']) except TypeError: print user raw_input( ) # pass # isolated points dont have quality #parameters for linear: 1:cte, 2:slope. for exponential: 1:cte, 2:multiplicative_cte, 3:multipli_cte_in_the_exp param1 = float(line['param1']) try: param2 = float(line['param2']) except TypeError: print "\nconstant segment!", user param2 = 0. try: param3 = float(line['param3']) except TypeError: pass # cos the linear segments dont have a param3 print "\n", user if user not in list_distinct_users: list_distinct_users.append(user) if num_segments_per_user != 0: list_num_segments_per_user.append( num_segments_per_user ) # i save the value from the previous user before starting the count for this one. num_segments_per_user = 1 else: num_segments_per_user += 1 if fit_type == "linear" or fit_type == "constant": if fit_type == "linear": num_lin_segments += 1 list_quality_values_lin.append(quality) elif fit_type == "constant": num_con_segments += 1 list_quality_values_con.append(quality) tupla = [] tupla.append(float(stop_day - start_day + 1.)) tupla.append(param2) list_pairs_slope_time_length.append(tupla) elif fit_type == "exponent": num_exp_segments += 1 list_quality_values_exp.append(quality) tupla = [] tupla.append(1. / param3) tupla.append( float(stop_weight - start_weight) ) # FAKE VALUES FOR NOW!!!! CAMBIAR ESTO POR LOS NOMBRES DE LOS CAMPOS QUE AUN NO EXISTEN: startY , stopY list_pairs_tau_deltaW.append(tupla) else: print fit_type, "nor lin nor exp!", type(fit_type), else: num_isolates += 1 histograma_gral.histograma( list_num_segments_per_user, "./Results/Distribution_num_segments_per_user.dat") for item in list_pairs_slope_time_length: print >> file2, item[0], item[1] file2.close() for item in list_pairs_tau_deltaW: print >> file3, item[0], item[1] file3.close() print >> file4, "Summary results cutting time series:\n\n" print >> file4, "Number of users:", len( list_distinct_users), "(with at least 20 weigh-ins)" print >> file4, "Number of segments:", num_lin_segments + num_con_segments + num_exp_segments #not including one-point segments print >> file4, "Average number of segments per individual:", num_segments / float( len(list_distinct_users)) print >> file4, "Number of one-point segments:", num_isolates print >> file4, "Number segments by type:" print >> file4, " Linear: ", num_lin_segments print >> file4, " Constant: ", num_con_segments print >> file4, " Exponential: ", num_exp_segments, "\n" print >> file4, "Regarding the goodness of the fits, DW average score:" print >> file4, " Linear: ", numpy.mean(list_quality_values_lin) print >> file4, " Constant: ", numpy.mean(list_quality_values_con) print >> file4, " Exponential: ", numpy.mean(list_quality_values_exp) file4.close() print len(list_quality_values_lin), list_quality_values_lin print len(list_quality_values_con), list_quality_values_con print len(list_quality_values_exp), list_quality_values_exp histograma_bines_gral.histograma_bins( list_quality_values_lin, 10, "./Results/Distribution_DW_scores_lin_segments.dat") histograma_bines_gral.histograma_bins( list_quality_values_con, 10, "./Results/Distribution_DW_scores_const_segments.dat") histograma_bines_gral.histograma_bins( list_quality_values_exp, 10, "./Results/Distribution_DW_scores_exp_segments.dat") print "\n done!"
def main(graph_name): G = nx.read_gml(graph_name) for_testing_fixed_set = "YES" # when YES, fixed values param, to get all statistics on final distances etc # change the range for the parameters accordingly envelopes = "YES" Niter = 1000 # 100 iter seems to be enough (no big diff. with respect to 1000it) percent_envelope = 95. list_id_weekends_T3 = look_for_T3_weekends( G ) # T3 doesnt share fellows in the weekend (but they are the exception) Nbins = 20 # for the histogram of sum of distances cutting_day = 175 # i use this only for the filenames all_team = "NO" # as adopters or not dir_real_data = '../Results/' dir = "../Results/weight_shifts/infection/" delta_end = 3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) if for_testing_fixed_set == "NO": output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_" + str( Niter) + "iter.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" file1 = open( filename_actual_evol, 'r' ) ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file = file1.readlines() list_actual_evol = [] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters = float(line.split(" ")[1]) list_actual_evol.append(num_adopters) ################################################################## #../Results/weight_shifts/infection/Average_time_evolution_Infection_training_p0.8_Immune0.3_1000iter_2012_avg_ic_day125.dat ESTOS VALORES SON EL OPTIMUM FIT THE 152-DIAS prob_min = 0.1 prob_max = 0.101 delta_prob = 0.1 prob_Immune_min = 0.00 prob_Immune_max = 0.001 delta_prob_Immune = 0.1 dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection if for_testing_fixed_set == "YES": output_file2 = dir + "Average_time_evolution_Infection_train_test_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_2012.dat" else: output_file2 = dir + "Average_time_evolution_Infection_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_2012.dat" file2 = open(output_file2, 'wt') file2.close() # list_final_I_values_fixed_p=[] # i dont care about the final values right now, but about the whole time evol list_lists_t_evolutions = [] list_dist_fixed_parameters = [] list_abs_dist_at_ending_point_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_final_num_infected = [] # list_abs_dist_at_cutting_day=[] for iter in range(Niter): #print " iter:",iter #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file_name_indiv_evol=output_file2.strip("Average_").split('.dat')[0]+"_indiv_iter"+str(iter)+".dat" # file4 = open(file_name_indiv_evol,'wt') # file4.close() ########################################## ########### set I.C. list_I = [] #list infected doctors max_order = 0 for n in G.nodes(): G.node[n]["status"] = "S" # all nodes are Susceptible if G.node[n]['type'] == "shift": if G.node[n]['order'] > max_order: max_order = G.node[n][ 'order'] # to get the last shift-order for the time loop else: if G.node[n]['label'] == "Wunderink" or G.node[n][ "label"] == "Weiss": G.node[n]["status"] = "I" list_I.append(G.node[n]['label']) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[n][ "label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" ################# the dynamics starts: t = 1 while t <= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: shift_lenght = int(G.node[n]['shift_lenght']) if shift_lenght == 2 and n not in list_id_weekends_T3: shift_lenght = 1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) # print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght'] flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection for i in range( shift_lenght ): # i repeat the infection process several times, to acount for shift lenght if G.node[doctor]["status"] == "S": rand = random.random() if rand < prob_infection: G.node[doctor]["status"] = "I" if G.node[doctor][ "type"] == "A": # fellows participate in the dynamics, but i only consider the attendings as real adopters list_I.append( G.node[doctor] ["label"]) # if for_testing_fixed_set=="YES": # if t==cutting_day: # list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_I)))) # print abs(float(list_actual_evol[-1])-float(len(list_I))), float(list_actual_evol[t]),float(len(list_I)) list_single_t_evolution.append(float(len(list_I))) t += 1 ######## end t loop ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS # file4 = open(file_name_indiv_evol,'at') #for i in range(len(list_single_t_evolution)): #time step by time step # print >> file4, i,list_single_t_evolution[i], prob_infection, prob_Immune #file4.close() ######################################################## list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol, list_single_t_evolution)) list_abs_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1] - list_actual_evol[-1] ) # i save the distance at the ending point between the current simu and actual evol list_final_num_infected.append(list_single_t_evolution[-1]) ######## end loop Niter list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)) if for_testing_fixed_set == "NO": file3 = open(output_file3, 'at') # i print out the landscape print >> file3, prob_infection, prob_Immune, numpy.mean( list_abs_dist_at_ending_point_fixed_parameters ), numpy.mean(list_dist_fixed_parameters), numpy.mean( list_final_num_infected), numpy.std( list_final_num_infected) file3.close() if ( numpy.mean(list_abs_dist_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end file2 = open(output_file2, 'at') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter): list_fixed_t.append(list_lists_t_evolutions[iter][s]) print >> file2, s, numpy.mean(list_fixed_t) file2.close() print "printed out: ", output_file2 # raw_input() if envelopes == "YES": calculate_envelope_set_curves.calculate_envelope( list_lists_t_evolutions, percent_envelope, "Infection", [prob_infection, prob_Immune]) if for_testing_fixed_set == "YES": num_valid_endings = 0. for item in list_abs_dist_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings += 1. print "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_day" + str( cutting_day) + ".dat" histograma_gral_negv_posit.histograma( list_dist_at_ending_point_fixed_parameters, histogram_filename) histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_day" + str( cutting_day) + ".dat" histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, Nbins, histogram_filename2) output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_p" + str( prob_infection) + "_" + "Immune" + str( prob_Immune) + "_" + str(Niter) + "iter_day" + str( cutting_day) + ".dat" file10 = open(output_file10, 'wt') print >> file10, "Summary results from train-testing infection with", Niter, "iter, and with values for the parameters: prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n" print >> file10, "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" print >> file10, "written optimum train_test evolution file:", output_file2 print >> file10, "written histogram file: ", histogram_filename file10.close() print "written Summary file: ", output_file10 print "written histogram file: ", histogram_filename print "written histogram file: ", histogram_filename2 prob_infection += delta_prob prob_Immune += delta_prob_Immune if for_testing_fixed_set == "NO": # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Infection_weight", all_team, Niter, None) # last argument doesnt apply (cutting day) if for_testing_fixed_set == "NO": print "written landscape file:", output_file3
def main(graph_name): G = nx.read_gml(graph_name) list_id_weekends_T3=look_for_T3_weekends(G) # T3 doesnt share fellows in the weekend (but they are the exception) cutting_day=175 # to separate training-testing Niter_training=1000 delta_end=3 # >= than + or - dr difference at the end of the evolution dir_real_data='../Results/' dir="../Results/weight_shifts/persuasion/" all_team="NO" # as adopters or not Nbins=20 # for the histogram of sum of distances fixed_param="FIXED_mutual0.5_damping.5_" # or "" # for the Results file that contains the sorted list of best parameters # fixed_parameters="mutual_encoug0.5_threshold0.5" # for the Landscape text file CHANGE PARAMETERS ACCORDINGLY!!! #output_file3="../Results/weight_shifts/Landscape_parameters_persuasion_train_test_"+str(fixed_parameters)+"_"+str(Niter_training)+"iter.dat" output_file3="../Results/weight_shifts/Landscape_parameters_persuasion_train_FIXED_damping0.1_threshold0.7_"+str(Niter_training)+"iter_alphaA_eq_alphaF.dat" file3 = open(output_file3,'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team=="YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol="../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" file1=open(filename_actual_evol,'r') ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file=file1.readlines() list_actual_evol=[] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters= float(line.split(" ")[1]) list_actual_evol.append(num_adopters) list_actual_evol_training=list_actual_evol[:cutting_day] ################################################################## #../Results/network_final_schedule_withTeam3/Time_evolutions_Persuasion_alpha0.2_damping0.0_mutual_encourg0.7_threshold0.4_unif_distr_50iter_2012_seed31Oct_finalnetwork.dat alpha_F_min=0.10 # # alpha=0: nobody changes their mind alpha_F_max=0.9 delta_alpha_F=0.10 #AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT! min_damping=0.500 #0.0 #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N max_damping=0.501 #0.451 delta_damping=0.10 min_mutual_encouragement=0.50 # # when two Adopters meet, they convince each other even more max_mutual_encouragement=0.501 delta_mutual_encouragement=0.10 threshold_min=0.10 # # larger than, to be an Adopter threshold_max=0.901 delta_threshold=0.10 # AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT print "\n\nPersuasion process on network, with Niter:",Niter_training dict_filenames_tot_distance={} # i will save the filename as key and the tot distance from that curve to the original one dict_filenames_prod_distances={} threshold=threshold_min while threshold<= threshold_max: print "thershold:",threshold alpha_F=alpha_F_min while alpha_F<= alpha_F_max: # i explore all the parameter space, and create a file per each set of values alpha_A=1.0*alpha_F print " alpha_F:",alpha_F mutual_encouragement=min_mutual_encouragement while mutual_encouragement <= max_mutual_encouragement: print " mutual_encouragement:",mutual_encouragement damping=min_damping while damping <= max_damping: print " damping:",damping # dir="../Results/weight_shifts/persuasion/alpha%.2f_damping%.2f/" % (alpha_F, damping ) output_file=dir+"Time_evolutions_Persuasion_training_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat" # file = open(output_file,'wt') # i am not saving the train file, because i will just want to know #file.close() # the optimum parameter set and go look for the whole-250-day file time_evol_number_adopters_ITER=[] # list of complete single realizations of the dynamics list_dist_fixed_parameters=[] list_dist_at_ending_point_fixed_parameters=[] list_dist_abs_at_ending_point_fixed_parameters=[] list_networks_at_cutting_day=[] list_final_num_adopt=[] for iter in range(Niter_training): # print " ",iter list_t=[] time_evol_number_adopters=[] # for a single realization of the dynamics num_adopters , seed_shift ,max_shift= set_ic(G,threshold) # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total time_evol_number_adopters.append(float(num_adopters)) list_t.append(0) ########### the dynamics starts: t=int(seed_shift)+1 # the first time step is just IC.??? while t< cutting_day: # loop over shifts, in chronological order (the order is the day index since seeding_day) list_t.append(t) for n in G.nodes(): if G.node[n]['type']=="shift" and G.node[n]['order']==t: # i look for the shift corresponding to that time step shift_lenght=int(G.node[n]['shift_lenght']) if shift_lenght==2 and n not in list_id_weekends_T3: shift_lenght=1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) flag_possible_persuasion=0 for doctor in G.neighbors(n): if G.node[doctor]["status"]=="Adopter": #first i check if any doctor is an adopter in this shift flag_possible_persuasion=1 break if flag_possible_persuasion==1: list_doctors=[] for doctor in G.neighbors(n): # for all drs in that shift list_doctors.append(doctor) pairs=itertools.combinations(list_doctors,2) # cos the shift can be 2 but also 3 doctors for pair in pairs: doctor1=pair[0] doctor2=pair[1] if G.node[doctor1]['status'] != G.node[doctor2]['status']: # if they think differently, # there will be persuasion persuasion(G,damping,doctor1,doctor2,alpha_A,alpha_F,threshold,shift_lenght) # i move their values of opinion update_opinions(G,threshold,doctor1,doctor2) # i update status and make sure the values of the vectors stay between [0,1] else: # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens) mutual_reinforcement(G,mutual_encouragement,doctor1,doctor2,shift_lenght) list_all_Adopters=[] #including fellows list_Adopters=[] #NOT including fellows for n in G.nodes(): try: if G.node[n]["status"]=="Adopter": if G.node[n]["label"] not in list_Adopters and G.node[n]["type"]=="A": list_Adopters.append(G.node[n]["label"]) except: pass # if the node is a shift, it doesnt have a 'status' attribute time_evol_number_adopters.append(float(len(list_Adopters))) t+=1 ############## end while loop over t time_evol_number_adopters_ITER.append(time_evol_number_adopters) list_final_num_adopt.append(time_evol_number_adopters[-1]) list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training,time_evol_number_adopters)) list_dist_abs_at_ending_point_fixed_parameters.append( abs(time_evol_number_adopters[-1]-list_actual_evol_training[-1]) ) list_dist_at_ending_point_fixed_parameters.append( time_evol_number_adopters[-1]-list_actual_evol_training[-1]) ####################### end loop Niter for the training fase list_pair_dist_std_delta_end=[] list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) ) list_pair_dist_std_delta_end.append(numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) value=numpy.mean(list_dist_fixed_parameters) *numpy.mean(list_dist_abs_at_ending_point_fixed_parameters) # if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!! dict_filenames_prod_distances[output_file]= value file3 = open(output_file3,'at') # i print out the landscape print >> file3, alpha_F, damping, mutual_encouragement, threshold,numpy.mean(list_dist_abs_at_ending_point_fixed_parameters), numpy.mean(list_dist_fixed_parameters), numpy.mean(list_final_num_adopt),numpy.std(list_final_num_adopt), numpy.std(list_final_num_adopt)/numpy.mean(list_final_num_adopt) file3.close() histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_test_train_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat" histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename) histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_training_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat" histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2) print "written histogram file: ",histogram_filename print "written histogram file: ",histogram_filename2 if (numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[output_file]=list_pair_dist_std_delta_end # file = open(output_file,'wt') # for i in range(len(time_evol_number_adopters)): #time step by time step # list_fixed_t=[] # for iteracion in range (Niter_training): #loop over all independent iter of the process # list_fixed_t.append(time_evol_number_adopters_ITER[iteracion][i]) # i collect all values for the same t, different iter # print >> file, list_t[i],numpy.mean(list_fixed_t),numpy.std(list_fixed_t), alpha_F,damping,mutual_encouragement #file.close() damping += delta_damping mutual_encouragement += delta_mutual_encouragement alpha_F += delta_alpha_F threshold += delta_threshold list_order_dict= compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,"Persuasion_training_land_weight",all_team,Niter_training,cutting_day) string_name="_persuasion_training_"+fixed_param+str(Niter_training)+"iter_"+str(cutting_day)+".dat" # for the "Results" file with the sorted list of files list_order_dict2= compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances(dict_filenames_prod_distances,string_name,all_team,Niter_training,cutting_day) #./Results/network_final_schedule_withTeam3_local/Time_evolutions_Persuasion_alpha0.4_damping0.4_mutual_encourg0.6_threshold0.5_unif_distr_2iter_2012_seed31Oct_finalnetwork.dat optimum_filename=list_order_dict[0][0] print optimum_filename alpha_F=float(list_order_dict[0][0].split("_alpha")[1].split("_")[0]) alpha_A=0.5*alpha_F damping=float(list_order_dict[0][0].split("_damping")[1].split("_")[0]) mutual_encouragement=float(list_order_dict[0][0].split("_mutual_encourg")[1].split("_")[0]) threshold=float(list_order_dict[0][0].split("_threshold")[1].split("_")[0]) print "Optimum (old method) alpha=", alpha_F, " damping=",damping," mutual encourag=",mutual_encouragement," threshold",threshold optimum_filename=list_order_dict2[0][0] print optimum_filename alpha_F=float(list_order_dict2[0][0].split("_alpha")[1].split("_")[0]) alpha_A=0.5*alpha_F damping=float(list_order_dict2[0][0].split("_damping")[1].split("_")[0]) mutual_encouragement=float(list_order_dict2[0][0].split("_mutual_encourg")[1].split("_")[0]) threshold=float(list_order_dict2[0][0].split("_threshold")[1].split("_")[0]) print "Optimum (product distances and SDs) alpha=", alpha_F, " damping=",damping," mutual encourag=",mutual_encouragement," threshold",threshold output_file10="../Results/weight_shifts/Summary_results_train_test_persuasion_alpha"+str(alpha_F)+"_FIXED_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_FIXED_threshold"+str(threshold)+"_"+str(Niter_training)+"iter_alphaA_eq_alphaF_day"+str(cutting_day)+".dat" file10 = open(output_file10,'wt') print >> file10, "Summary results from train-testing persuasion with",Niter_training, "iter, using the avg of the cutting points as IC, and with values for the parameters: alpha ",alpha_F," damping: ",damping," mutual_encourg: ",mutual_encouragement," threshold:",threshold print >> file10, "Look for optimum the file set of parameters (or run those simulations):",optimum_filename file10.close() print "Look for optimum the file set of parameters (or run those simulations):",optimum_filename print "printed out landscape file:",output_file3
def main(graph_name): G = nx.read_gml(graph_name) Niter = 10000 dir_real_data = '../Results/' Nbins = 100 all_team = "NO" # as adopters or not # output_file3=dir_real_data+"Landscape_parameters_persuasion_"+str(Niter)+"iter.dat" #file3 = open(output_file3,'wt') ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv" else: filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv" #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs list_actual_evol = [] result_actual_file = csv.reader(open(filename_actual_evol, 'rb'), delimiter=',') cont = 0 for row in result_actual_file: if cont > 0: # i ignore the first line with the headers num_adopters = row[3] list_actual_evol.append(float(num_adopters)) cont += 1 ################################################################## #../Results/network_final_schedule_withTeam3/Time_evolutions_Persuasion_alpha0.1_damping0.3_mutual_encourg0.3_threshold0.2_unif_distr_50iter_2012_seed31Oct_finalnetwork.dat alpha_F = 0.10 # alpha=0: nobody changes their mind alpha_A = 0.5 * alpha_F damping = 0.3 #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N mutual_encouragement = 0.3 # when two Adopters meet, they convince each other even more threshold = 0.20 # larger than, to be an Adopte print "\n\nPersuasion process on network, with Niter:", Niter dict_timestep_list_delta_positions = {} for i in range(len(list_actual_evol)): dict_timestep_list_delta_positions[i] = [] dir = "../Results/network_final_schedule_withTeam3_local/" output_file = dir + "Time_evolutions_Persuasion_alpha" + str( alpha_F) + "_damping" + str(damping) + "_mutual_encourg" + str( mutual_encouragement) + "_threshold" + str( threshold) + "_unif_distr_" + str( Niter) + "iter_2012_seed31Oct_finalnetwork.dat" file = open(output_file, 'wt') file.close() list_delta_position = [] time_evol_number_adopters_ITER = [ ] # list of complete single realizations of the dynamics for iter in range(Niter): print " ", iter list_t = [] time_evol_number_adopters = [ ] # for a single realization of the dynamics num_adopters, seed_shift, max_shift = set_ic( G, threshold ) # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total time_evol_number_adopters.append(float(num_adopters)) # print "initial number of adopters:", num_adopters list_t.append(0) # the dynamics starts: t = int(seed_shift) + 1 # the first time step is just IC.??? while t <= max_shift: # loop over shifts, in chronological order (the order is the day index since seeding_day) list_t.append(t) for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: # i look for the shift corresponding to that time step flag_possible_persuasion = 0 for doctor in G.neighbors(n): if G.node[doctor][ "status"] == "Adopter": #first i check if any doctor is an adopter in this shift flag_possible_persuasion = 1 break if flag_possible_persuasion == 1: list_doctors = [] for doctor in G.neighbors( n): # for all drs in that shift list_doctors.append(doctor) pairs = itertools.combinations( list_doctors, 2) # cos the shift can be 2 but also 3 doctors for pair in pairs: doctor1 = pair[0] doctor2 = pair[1] if G.node[doctor1]['status'] != G.node[doctor2][ 'status']: # if they think differently, # there will be persuasion persuasion(G, damping, doctor1, doctor2, alpha_A, alpha_F, threshold, list_delta_position, t, dict_timestep_list_delta_positions ) # i move their values of opinion update_opinions( G, threshold, doctor1, doctor2 ) # i update status and make sure the values of the vectors stay between [0,1] else: # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens) mutual_reinforcement( G, mutual_encouragement, doctor1, doctor2, list_delta_position, t, dict_timestep_list_delta_positions) list_Adopters = [] #count how many i have at this time for n in G.nodes(): try: if G.node[n]["status"] == "Adopter": if G.node[n]["label"] not in list_Adopters: list_Adopters.append(G.node[n]["label"]) except: pass # if the node is a shift, it doesnt have a 'status' attribute time_evol_number_adopters.append(float(len(list_Adopters))) t += 1 ############## end while loop over t time_evol_number_adopters_ITER.append(time_evol_number_adopters) ##############end loop Niter average_time_evol_number_adopters = [] for i in range(len(time_evol_number_adopters)): #time step by time step list_fixed_t = [] for iteracion in range( Niter): #loop over all independent iter of the process list_fixed_t.append( time_evol_number_adopters_ITER[iteracion] [i]) # i collect all values for the same t, different iter average_time_evol_number_adopters.append( numpy.mean(list_fixed_t)) # i create the mean time evolution # print list_delta_position histograma_bines_gral.histograma_bins( list_delta_position, Nbins, "../Results/histogr_delta_positions_alpha" + str(alpha_F) + "_damping" + str(damping) + "_mutual_encourg" + str(mutual_encouragement) + "_threshold" + str(threshold) + "_" + str(Niter) + "iter_" + str(Nbins) + "bins.dat") dir = "../Results/" output_file3 = dir + "List_delta_positions_vs_timestep" + str( alpha_F) + "_damping" + str(damping) + "_mutual_encourg" + str( mutual_encouragement) + "_threshold" + str(threshold) + "_" + str( Niter) + "iter.dat" file3 = open(output_file3, 'wt') for key in dict_timestep_list_delta_positions: print "\n", key, print >> file3, "\n", key, for item in dict_timestep_list_delta_positions[key]: print item, print >> file3, item, file3.close() print "\n written:", output_file3
def main(graph_name): G = nx.read_gml(graph_name) for_testing_fixed_set = "YES" # when YES, fixed values param, to get all statistics on final distances etc # change the range for the parameters accordingly envelopes = "NO" Niter = 1000 percent_envelope = 95. list_id_weekends_T3 = look_for_T3_weekends( G ) # T3 doesnt share fellows in the weekend (but they are the exception) cutting_day = 175 all_team = "NO" # as adopters or not dir_real_data = '../Results/' dir = "../Results/weight_shifts/infection/" delta_end = 3. # >= than + or - dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!) Nbins = 20 # for the histogram of sum of distances if for_testing_fixed_set == "NO": output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_memory_fixed_dose_thr_" + str( Niter) + "iterFIXED_Thr0.2_Imm0.0.dat" file3 = open(output_file3, 'wt') file3.close() ###################################################################################### # I read the file of the actual evolution of the idea spreading in the hospital: ## ###################################################################################### if all_team == "YES": print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" exit() else: filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat" file1 = open( filename_actual_evol, 'r' ) ## i read the file: list_dates_and_names_current_adopters.txt (created with: extract_real_evolution_number_adopters.py) list_lines_file = file1.readlines() list_actual_evol = [] for line in list_lines_file: # [1:]: # i exclude the first row num_adopters = float(line.split(" ")[1]) list_actual_evol.append(num_adopters) ################################################################################ prob_min = 0.3 prob_max = 0.301 delta_prob = 0.1 prob_Immune_min = 0.00 prob_Immune_max = 0.001 delta_prob_Immune = 0.1 dose_min = 0.7 # of a single encounter with an infected (starting from zero doesnt make sense) dose_max = 0.701 delta_dose = 0.01 ########## KEEP FIXED TO ONE infect_threshold_min = 1.00 # i can define the dose in units of the threshold infect_threshold_max = 1.001 delta_infect_threshold = 0.1 ############ dict_filenames_tot_distance = { } # i will save the filename as key and the tot distance from that curve to the original one prob_Immune = prob_Immune_min while prob_Immune <= prob_Immune_max: print "prom Immune:", prob_Immune prob_infection = prob_min while prob_infection <= prob_max: print " p:", prob_infection infect_threshold = infect_threshold_min while infect_threshold <= infect_threshold_max: print " threshold:", infect_threshold dose = dose_min while dose <= dose_max: print " dose:", dose if for_testing_fixed_set == "YES": output_file2 = dir + "Average_time_evolution_Infection_memory_train_test_p" + str( prob_infection) + "_Immune" + str( prob_Immune) + "_FIXED_threshold" + str( infect_threshold) + "_dose" + str( dose) + "_" + str(Niter) + "iter.dat" else: output_file2 = dir + "Average_time_evolution_Infection_memory_p" + str( prob_infection) + "_Immune" + str( prob_Immune) + "_FIXED_threshold" + str( infect_threshold) + "_dose" + str( dose) + "_" + str(Niter) + "iter.dat" file2 = open(output_file2, 'wt') file2.close() num_shifts = 0 for n in G.nodes(): G.node[n]["status"] = "S" G.node[n][ "infec_value"] = 0. # when this value goes over the infect_threshold, the dr is infected if G.node[n]['type'] == "shift": num_shifts += 1 list_lists_t_evolutions = [ ] # i create the empty list of list for the Niter temporal evolutions list_dist_fixed_parameters = [] list_abs_dist_at_ending_point_fixed_parameters = [] list_dist_at_ending_point_fixed_parameters = [] list_final_num_infected = [] for iter in range(Niter): # print " iter:",iter list_I = [] #list infected doctors list_ordering = [] list_s = [] ########### set I.C. max_order = 0 for n in G.nodes(): G.node[n][ "status"] = "S" # all nodes are Susceptible if G.node[n]['type'] == "shift": list_s.append(n) if G.node[n]['order'] > max_order: max_order = G.node[n]['order'] else: if G.node[n]['label'] == "Wunderink" or G.node[ n]["label"] == "Weiss": G.node[n]["status"] = "I" G.node[n][ "infec_value"] = infect_threshold + 1. list_I.append(G.node[n]['label']) list_single_t_evolution = [] list_single_t_evolution.append( 2.0) # I always start with TWO infected doctors!! for n in G.nodes( ): # i make some DOCTORs INMUNE (anyone except Weiss and Wunderink) if (G.node[n]['type'] == "A") or (G.node[n]['type'] == "F"): if G.node[n]['label'] != "Wunderink" and G.node[ n]["label"] != "Weiss": rand = random.random() if rand < prob_Immune: G.node[n]["status"] = "Immune" ################# the dynamics starts: t = 1 while t <= max_order: # loop over shifts, in order for n in G.nodes(): if G.node[n]['type'] == "shift" and G.node[n][ 'order'] == t: shift_lenght = int( G.node[n]['shift_lenght']) if shift_lenght == 2 and n not in list_id_weekends_T3: shift_lenght = 1 # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2. (weekend shifts for T3 are two day long, with no sharing fellows) flag_possible_infection = 0 for doctor in G.neighbors( n ): #first i check if any doctor is infected in this shift if G.node[doctor]["status"] == "I": flag_possible_infection = 1 if flag_possible_infection: for doctor in G.neighbors( n ): # then the doctors in that shift, gets infected with prob_infection for i in range(shift_lenght): if G.node[doctor][ "status"] == "S": rand = random.random() if rand < prob_infection: # with prob p the infection occurres G.node[doctor][ "infec_value"] += dose # and bumps the infection_value of that susceptible dr if G.node[doctor][ "infec_value"] >= infect_threshold: # becomes infected G.node[doctor][ "status"] = "I" if G.node[doctor][ "type"] == "A": # fellows participate in the dynamics, but i only consider the attendings as real adopters list_I.append( G.node[ doctor] ["label"]) # for node in G.nodes(): # if G.node[node]['type']!="shift": # print t, G.node[node]['label'], G.node[node]["infec_value"] #raw_input() list_single_t_evolution.append(float(len(list_I))) t += 1 ######## end t loop list_lists_t_evolutions.append(list_single_t_evolution) list_dist_fixed_parameters.append( compare_real_evol_vs_simus_to_be_called. compare_two_curves(list_actual_evol, list_single_t_evolution)) list_abs_dist_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1] - list_actual_evol[-1]) ) # i save the distance at the ending point between the current simu and actual evol list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1] - list_actual_evol[-1] ) # i save the distance at the ending point between the current simu and actual evol list_final_num_infected.append( list_single_t_evolution[-1]) ######## end loop Niter list_pair_dist_std_delta_end = [] list_pair_dist_std_delta_end.append( numpy.mean(list_dist_fixed_parameters) ) # average dist between the curves over Niter list_pair_dist_std_delta_end.append( numpy.std(list_dist_fixed_parameters)) list_pair_dist_std_delta_end.append( numpy.mean( list_abs_dist_at_ending_point_fixed_parameters)) if for_testing_fixed_set == "NO": file3 = open(output_file3, 'at') # i print out the landscape print >> file3, prob_infection, prob_Immune, numpy.mean( list_abs_dist_at_ending_point_fixed_parameters ), numpy.mean(list_dist_fixed_parameters), numpy.mean( list_final_num_infected), numpy.std( list_final_num_infected ), numpy.std(list_final_num_infected) / numpy.mean( list_final_num_infected) file3.close() if ( numpy.mean( list_abs_dist_at_ending_point_fixed_parameters) ) <= delta_end: # i only consider situations close enough at the ending point dict_filenames_tot_distance[ output_file2] = list_pair_dist_std_delta_end file2 = open(output_file2, 'at') for s in range(len(list_single_t_evolution)): list_fixed_t = [] for iter in range(Niter): list_fixed_t.append( list_lists_t_evolutions[iter][s]) print >> file2, s, numpy.mean(list_fixed_t) file2.close() print "printed out: ", output_file2 if for_testing_fixed_set == "YES": num_valid_endings = 0. for item in list_abs_dist_at_ending_point_fixed_parameters: if item <= delta_end: # i count how many realizations i get close enough at the ending point num_valid_endings += 1. print "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str( dose) + "_" + str(Niter) + "iter_day" + str( cutting_day) + ".dat" histograma_gral_negv_posit.histograma( list_dist_at_ending_point_fixed_parameters, histogram_filename) histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_p" + str( prob_infection ) + "_Immune" + str(prob_Immune) + "_threshold" + str( infect_threshold) + "_dose" + str( dose) + "_" + str(Niter) + "iter_day" + str( cutting_day) + ".dat" histograma_bines_gral.histograma_bins( list_dist_fixed_parameters, Nbins, histogram_filename2) output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_memory_distrib_p" + str( prob_infection) + "_" + "FIXED_Immune" + str( prob_Immune) + "_FIXED_threshold" + str( infect_threshold ) + "_dose" + str(dose) + "_" + str( Niter) + "iter_day" + str(cutting_day) + ".dat" file10 = open(output_file10, 'wt') print >> file10, "Summary results from train-testing infection with", Niter, "iter, and with values for the parameters: prob_inf ", prob_infection, " prob immune: ", prob_Immune, "infect. threshold:", infect_threshold, "dose:", dose, "\n" print >> file10, "average distance of the optimum in the testing segment:", numpy.mean( list_dist_fixed_parameters), numpy.std( list_dist_fixed_parameters ), list_dist_fixed_parameters, "\n" print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean( list_dist_at_ending_point_fixed_parameters ), "SD final dist", numpy.std( list_dist_at_ending_point_fixed_parameters ), list_dist_at_ending_point_fixed_parameters, "\n" print >> file10, "written optimum train_test evolution file:", output_file2 print >> file10, "written histogram file: ", histogram_filename file10.close() print "written Summary file: ", output_file10 print "written histogram file: ", histogram_filename if envelopes == "YES": calculate_envelope_set_curves.calculate_envelope( list_lists_t_evolutions, percent_envelope, "Infection_memory_fixed", [ prob_infection, prob_Immune, infect_threshold, dose ]) dose += delta_dose infect_threshold += delta_infect_threshold prob_infection += delta_prob prob_Immune += delta_prob_Immune if for_testing_fixed_set == "NO": # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end( dict_filenames_tot_distance, "Infection_memory", all_team, Niter, None) print "written landscape file:", output_file3