def gather_repeated_simulation_statistics(clab_fname, coordinate, cat): ''' Gathers statistics from several repetitions of the same simulation. Returns: statistics - a dictionary: {chwila czasowa: {lewy: {stat1: , stat2: ...}, prawy: {}}, ...} lewy i prawy zamienione na 0/1 mode_bigger - files - ids of consecutive time stamps, used in statistics dictionary ''' import os cats = map(lambda x: cat+"//"+x, os.listdir(cat)) #assume that in each catalogue there are files named the same, which correspond to one another files = os.listdir(cats[0]) #how many simulations chose bigger, how many chose smaller mode_bigger = {} statistics = {} # #list statistics for each file, gathering info from all catalogues: for fname in sorted(files, key=lambda x: string2prefix_num(x)): mode_bigger[fname] = [0, 0] #all results_of_simulation for left and all results_of_simulation for right all_results_modes = [] all_results_words = [] for cat in cats: bigger_mode, smaller_mode, avg_wordnum1, avg_wordnum2 = \ get_mode_word_stats(cat+"/"+fname, clab_fname, coordinate) if bigger_mode > smaller_mode: mode_bigger[fname][0] += 1 elif bigger_mode < smaller_mode: mode_bigger[fname][1] += 1 all_results_modes += [(bigger_mode, smaller_mode)] all_results_words += [(avg_wordnum1, avg_wordnum2)] #=====calculate statistics for current fname=====# statistics[fname] = [{}, {}] for ind in [0, 1]:#index of half sub_stats = {} sub_stats = get_numpy_statistics( map(lambda x: x[ind], all_results_modes), STATISTICS_MEASURES ) for key in sub_stats.iterkeys(): statistics[fname][ind]['mode_'+key] = sub_stats[key] sub_stats = get_numpy_statistics( map(lambda x: x[ind], all_results_words), STATISTICS_MEASURES ) for key in sub_stats.iterkeys(): statistics[fname][ind]['wordnum_'+key] = sub_stats[key] return statistics, mode_bigger, files
#for plotting purposes: medians = [] means = [] xvals = [] #how many simulations chose bigger, how many chose smaller mode_bigger = {} #list statistics for each file, gathering info from all catalogues: for fname in sorted(files, key=lambda x: string2prefix_num(x)): mode_bigger[fname] = [0, 0] #all results_of_simulation for left and all results_of_simulation for right all_results_modes = [] all_results_words = [] for cat in cats: bigger_mode, smaller_mode, avg_wordnum1, avg_wordnum2 = \ get_mode_word_stats(cat+"/"+fname, clab_fname, coordinate) if bigger_mode > smaller_mode: mode_bigger[fname][0] += 1 elif bigger_mode < smaller_mode: mode_bigger[fname][1] += 1 all_results_modes += [(bigger_mode, smaller_mode)] all_results_words += [(avg_wordnum1, avg_wordnum2)] #=====calculate statistics for current fname=====# statistics = [{}, {}] for ind in [0, 1]:#index of half sub_stats = {} sub_stats = get_numpy_statistics( map(lambda x: x[ind], \ all_results_modes), \
# for plotting purposes: medians = [] means = [] xvals = [] # how many simulations chose bigger, how many chose smaller mode_bigger = {} # list statistics for each file, gathering info from all catalogues: for fname in sorted(files, key=lambda x: string2prefix_num(x)): mode_bigger[fname] = [0, 0] # all results for left and all results for right all_results_modes = [] all_results_words = [] for cat in cats: bigger_mode, smaller_mode, avg_wordnum1, avg_wordnum2 = get_mode_word_stats( cat + "/" + fname, clab_fname, coordinate ) if bigger_mode > smaller_mode: mode_bigger[fname][0] += 1 elif bigger_mode < smaller_mode: mode_bigger[fname][1] += 1 all_results_modes += [(bigger_mode, smaller_mode)] all_results_words += [(avg_wordnum1, avg_wordnum2)] # =====calculate statistics for current fname=====# statistics = [{}, {}] for ind in [0, 1]: # index of half sub_stats = {} sub_stats = get_numpy_statistics(map(lambda x: x[ind], all_results_modes), STATISTICS_MEASURES)