def test_propagate_infection(params_path): """ tests the consistency of the propagate_infection function in HomogenousEnvironment """ Params.load_from(params_path) DiseaseState.init_infectiousness_list() percent_infected = 0.1 contact_prob = 0.25 num_of_people = 400 weight_list = [ max(0.3, random.random() - 0.1) for _ in range(num_of_people) ] env = HomogeneousEnvironment(contact_prob, "test") total = 0 infections = [] loops = 100 for _ in range(loops): env._person_dict = { Person(random.randint(20, 60)): weight_list[i] for i in range(num_of_people) } for p, w in env._person_dict.items(): if percent_infected < random.random(): p._disease_state = DiseaseState.ASYMPTOMATICINFECTIOUS else: p._disease_state = DiseaseState.SUSCEPTIBLE p._change() env.sign_up_for_today(p, w) num_of_infections = len( env.propagate_infection(date(year=2020, month=12, day=1))) infections.append(num_of_infections) total += num_of_infections avg = total / loops assert abs(avg - median(infections)) < 10
def truncate_map(occurence_map): """Truncate an occurence map by removing uncommon iteration Parameters: occurence_map (dict): Dictionary containing word as key and occurence as value Returns: dict: Truncated map """ # Get occurences distribution distribution = Counter(occurence_map.values()) dist_median = median(distribution.values()) # Compute upper bound limit = 0.99 dist_upper_median = sorted( [v for v in distribution.values() if v > dist_median]) dist_upper_bound = int(floor(len(dist_upper_median) * limit)) # Compute new distribution min_dist_value = dist_upper_median[dist_upper_bound - 1] distribution = { k: v for k, v in distribution.items() if v <= min_dist_value } # Return new occurence map return {k: v for k, v in occurence_map.items() if v in distribution.keys()}
def _compute_stats_function(values): stats = None if len(values)>1: stats = {} stats['min'] = min(values) stats['max'] = max(values) stats['mean'] = mean(values) stats['median'] = median(values) stats['1st-quartile'] = percentile(values,25) stats['3rd-quartile'] = percentile(values,75) stats['std-error'] = std(values) return stats
def computeTranslation(img1, img2, img1Points, maxTranslation2, minNMatches, windowSize=(5, 5), level=5, criteria=(cv2.TERM_CRITERIA_EPS, 0, 0.01)): '''Computes the translation of img2 with respect to img1 (loaded using OpenCV as numpy arrays) img1Points are used to compute the translation TODO add diagnostic if data is all over the place, and it most likely is not a translation (eg zoom, other non linear distortion)''' from numpy.core.multiarray import array from numpy.lib.function_base import median from numpy.core.fromnumeric import sum nextPoints = array([]) (img2Points, status, track_error) = cv2.calcOpticalFlowPyrLK(img1, img2, img1Points, nextPoints, winSize=windowSize, maxLevel=level, criteria=criteria) # calcOpticalFlowPyrLK(prevImg, nextImg, prevPts[, nextPts[, status[, err[, winSize[, maxLevel[, criteria[, derivLambda[, flags]]]]]]]]) -> nextPts, status, err delta = [] for (k, (p1, p2)) in enumerate(zip(img1Points, img2Points)): if status[k] == 1: dp = p2 - p1 d = sum(dp**2) if d < maxTranslation2: delta.append(dp) if len(delta) >= minNMatches: return median(delta, axis=0) else: print(dp) return None
def truncate_map(occurence_map): """Truncate an occurence map by removing uncommon iteration Parameters: occurence_map (dict): Dictionary containing word as key and occurence as value Returns: dict: Truncated map """ # Get occurences distribution distribution = Counter(occurence_map.values()) dist_median = median(distribution.values()) # Compute upper bound limit = 0.99 dist_upper_median = sorted([v for v in distribution.values() if v > dist_median]) dist_upper_bound = int(floor(len(dist_upper_median) * limit)) # Compute new distribution min_dist_value = dist_upper_median[dist_upper_bound - 1] distribution = {k: v for k, v in distribution.items() if v <= min_dist_value} # Return new occurence map return {k: v for k, v in occurence_map.items() if v in distribution.keys()}
def computeTranslation( img1, img2, img1Points, maxTranslation2, minNMatches, windowSize=(5, 5), level=5, criteria=(cv2.TERM_CRITERIA_EPS, 0, 0.01), ): """Computes the translation of img2 with respect to img1 (loaded using OpenCV as numpy arrays) img1Points are used to compute the translation TODO add diagnostic if data is all over the place, and it most likely is not a translation (eg zoom, other non linear distortion)""" from numpy.core.multiarray import array from numpy.lib.function_base import median from numpy.core.fromnumeric import sum nextPoints = array([]) (img2Points, status, track_error) = cv2.calcOpticalFlowPyrLK( img1, img2, img1Points, nextPoints, winSize=windowSize, maxLevel=level, criteria=criteria ) # calcOpticalFlowPyrLK(prevImg, nextImg, prevPts[, nextPts[, status[, err[, winSize[, maxLevel[, criteria[, derivLambda[, flags]]]]]]]]) -> nextPts, status, err delta = [] for (k, (p1, p2)) in enumerate(zip(img1Points, img2Points)): if status[k] == 1: dp = p2 - p1 d = sum(dp ** 2) if d < maxTranslation2: delta.append(dp) if len(delta) >= minNMatches: return median(delta, axis=0) else: print (dp) return None
from scipy.stats.morestats import wilcoxon from numpy.lib.function_base import average, median #tree = [96.19047619047619, 96.28571428571429, 95.61904761904762, 96.0, 96.57142857142857, 96.57142857142857, 95.14285714285714, 96.0, 96.19047619047619, 95.9047619047619, 96.28571428571429, 95.61904761904762, 97.33333333333333, 94.85714285714286, 95.14285714285714, 94.28571428571429, 94.19047619047619, 96.38095238095238, 95.04761904761905, 94.85714285714286, 96.19047619047619, 96.38095238095238, 96.38095238095238, 96.47619047619048, 96.19047619047619, 94.57142857142857, 96.38095238095238, 96.47619047619048, 96.47619047619048, 94.95238095238095, 96.19047619047619, 95.14285714285714, 95.71428571428571, 94.85714285714286, 95.9047619047619, 96.66666666666667, 94.95238095238095, 94.57142857142857, 94.19047619047619, 94.19047619047619, 95.9047619047619, 95.9047619047619, 94.66666666666667, 96.0952380952381, 96.0, 95.9047619047619, 97.14285714285714, 96.19047619047619, 96.28571428571429, 96.19047619047619] #C457NN = [96.0, 94.95238095238095, 94.28571428571429, 95.61904761904762, 95.23809523809524, 96.38095238095238, 95.23809523809524, 96.0, 95.04761904761905, 96.0952380952381, 96.19047619047619, 94.57142857142857, 96.47619047619048, 96.38095238095238, 95.42857142857143, 94.85714285714286, 94.57142857142857, 96.19047619047619, 94.76190476190476, 94.47619047619048, 94.95238095238095, 95.61904761904762, 96.19047619047619, 96.0, 95.04761904761905, 95.33333333333333, 96.28571428571429, 95.52380952380952, 96.47619047619048, 95.61904761904762, 95.80952380952381, 95.14285714285714, 96.0952380952381, 95.04761904761905, 95.33333333333333, 96.85714285714286, 95.23809523809524, 96.0952380952381, 93.52380952380952, 95.52380952380952, 95.71428571428571, 96.0, 94.57142857142857, 96.66666666666667, 96.0, 95.61904761904762, 96.38095238095238, 95.61904761904762, 96.0, 95.61904761904762] tree = [96.18604651162791, 96.18604651162791, 96.37209302325581, 96.46511627906976, 96.74418604651163, 96.09302325581395, 95.90697674418605, 96.18604651162791, 96.27906976744185, 96.27906976744185, 96.09302325581395, 96.18604651162791, 96.65116279069767, 96.18604651162791, 95.81395348837209, 96.0, 96.55813953488372, 96.18604651162791, 95.72093023255815, 95.72093023255815, 96.0, 95.90697674418605, 96.09302325581395, 96.0, 96.46511627906976, 96.18604651162791, 96.09302325581395, 95.90697674418605, 95.81395348837209, 96.55813953488372, 96.0, 96.46511627906976, 96.0, 96.09302325581395, 96.18604651162791, 95.72093023255815, 96.27906976744185, 95.62790697674419, 94.79069767441861, 95.81395348837209, 96.09302325581395, 96.18604651162791, 96.37209302325581, 96.37209302325581, 96.27906976744185, 96.09302325581395, 96.46511627906976, 96.74418604651163, 96.0, 96.18604651162791] C457NN = [94.13953488372093, 95.53488372093024, 96.46511627906976, 95.44186046511628, 95.72093023255815, 95.53488372093024, 95.06976744186046, 96.0, 94.88372093023256, 95.81395348837209, 94.97674418604652, 94.69767441860465, 96.37209302325581, 95.25581395348837, 94.79069767441861, 95.53488372093024, 96.46511627906976, 96.0, 95.90697674418605, 95.62790697674419, 95.81395348837209, 94.32558139534883, 95.16279069767442, 94.4186046511628, 94.97674418604652, 96.0, 95.81395348837209, 95.34883720930233, 95.72093023255815, 95.90697674418605, 95.53488372093024, 95.72093023255815, 95.25581395348837, 95.62790697674419, 96.55813953488372, 96.37209302325581, 96.09302325581395, 94.51162790697674, 95.16279069767442, 94.79069767441861, 95.25581395348837, 94.69767441860465, 96.46511627906976, 95.44186046511628, 95.81395348837209, 96.55813953488372, 95.25581395348837, 96.46511627906976, 94.97674418604652, 94.97674418604652] print 'average tree = ',average(tree) print 'average C4.5(7NN) = ', average(C457NN) print 'median tree = ',median(tree) print 'median C4.5(7NN) = ', median(C457NN) print 'wilcoxon test for J48 or C4.5(7NN):', wilcoxon(tree, C457NN)
from numpy.core.fromnumeric import mean from numpy.lib.function_base import median from projeto.Evaluation import Evaluation from projeto.instancia import Instances from projeto.kmeans import KMeans instancias = Instances('fertility_Diagnosisnormalized.csv') kmeans=KMeans(instancias,2) kmeans.setDistanceFunction(valor="2") evaluation = Evaluation(kmeans,instancias) resultado= evaluation.multipleRuns(30) print median(resultado[0]),median(resultado[1]) print resultado # for g1,g2 in zip(resultado[0],resultado[1]): # print "Grupo 1: %s - Grupo 2: %s"%(g2,g1) evaluation.maxDaviesBouldin(1) print evaluation.BestPartition()
def itemsList2models(source): linecount = 0 indcount = 0 line1 = True lines = open(source) ind = {} perPerson = {} baseline = ['BaselineRandom', 'CorrectReply', 'AlwaysReject'] models = [ 'CR&time', 'ClassicReas', 'FFT-Max', 'FFT-ZigZag(Z+)', 'HeurRecogn', 'HeurRecogn-lin.', 'S2MR', 'SentimentAnalysis' ] if '3' in source: models = models + ['WMSupprByMood'] for line in lines: listLine = line.replace('\r', '').replace('\n', '').split(',') if line1: line1 = False for key in listLine: ind[key] = indcount indcount += 1 continue linecount += 1 person = listLine[ind['id']] if person not in perPerson.keys(): perPerson[person] = {} for model in models: # ind.keys(): if model not in perPerson[person].keys(): perPerson[person][model] = [] perPerson[person][model].append(1 - abs( float(listLine[ind['binaryResponse']]) - float(listLine[ind[model]]))) for model in baseline: if model not in perPerson[person].keys(): perPerson[person][model] = [] if model == 'BaselineRandom': perPerson[person][model].append( 1 - abs(float((listLine[ind['binaryResponse']])) - float(0.5))) if model == 'CorrectReply': perPerson[person][model].append(1 - abs( float((listLine[ind['binaryResponse']])) - float('T' in listLine[ind['truthful']]))) if model == 'AlwaysReject': perPerson[person][model].append( 1 - abs(float((listLine[ind['binaryResponse']])) - float(0))) pairs = [] pairdone = [] for model1 in models: for model2 in models: if model1 == model2: continue if model1 + model2 in pairdone or model2 + model1 in pairdone: continue pairdone.append(model1 + model2) maxModels = {} maxPerfs = {} for pers in perPerson.keys(): maxperf, maxmodel = 0, None for model in [model1, model2]: if model not in perPerson[pers].keys(): continue if mean(perPerson[pers][model]) > maxperf: maxperf, maxmodel = mean(perPerson[pers][model]), model maxPerfs[pers] = maxperf maxModels[pers] = maxmodel numberOfModelAsMax = {} for pers in maxModels.keys(): if maxModels[pers] not in numberOfModelAsMax.keys(): numberOfModelAsMax[maxModels[pers]] = 0 numberOfModelAsMax[maxModels[pers]] += 1 allPersPerfList = [maxPerfs[a] for a in maxPerfs.keys()] if len([a for a in numberOfModelAsMax.keys() if a != None]) < 2: continue pairs.append((numberOfModelAsMax, mean(allPersPerfList), std(allPersPerfList), median(allPersPerfList), median_absolute_deviation(allPersPerfList))) pairs.sort(key=order) print(pairs[:5]) for model in models + baseline: meanresperpers = [ mean(perPerson[pers][model]) for pers in perPerson.keys() ] print(model, ':', int(20 - len(model)) * ' ', 'mean', round(mean(meanresperpers), 2), 'median', round(median(meanresperpers), 2), 'MAD', round(median_absolute_deviation(meanresperpers), 2))
def itemsList(source): linecount = 0 indcount = 0 line1 = True lines = open(source) ind = {} perPerson = {} baseline = ['BaselineRandom', 'CorrectReply', 'AlwaysReject'] models = [ 'CR&time', 'ClassicReas', 'FFT-Max', 'FFT-ZigZag(Z+)', 'HeurRecogn', 'HeurRecogn-lin.', 'S2MR', 'SentimentAnalysis' ] if '3' in source: models = models + ['WMSupprByMood'] for line in lines: listLine = line.replace('\r', '').replace('\n', '').split(',') if line1: line1 = False for key in listLine: ind[key] = indcount indcount += 1 continue linecount += 1 person = listLine[ind['id']] if person not in perPerson.keys(): perPerson[person] = {} for model in models: if model not in perPerson[person].keys(): perPerson[person][model] = [] perPerson[person][model].append(1 - abs( float((listLine[ind['binaryResponse']])) - float(listLine[ind[model]]))) maxModels = {} maxPerfs = {} for pers in perPerson.keys(): maxperf, maxmodel = 0, None for model in perPerson[pers].keys(): if mean(perPerson[pers][model]) > maxperf: maxperf, maxmodel = mean(perPerson[pers][model]), model maxPerfs[pers] = maxperf maxModels[pers] = maxmodel numberOfModelAsMax = {} for pers in maxModels.keys(): if maxModels[pers] not in numberOfModelAsMax.keys(): numberOfModelAsMax[maxModels[pers]] = 0 numberOfModelAsMax[maxModels[pers]] += 1 allPersPerfList = [maxPerfs[a] for a in maxPerfs.keys()] #print(numberOfModelAsMax) percOfModelsAsMax = {} for a in numberOfModelAsMax.keys(): percOfModelsAsMax[a] = float(numberOfModelAsMax[a]) / sum( numberOfModelAsMax[a] for a in numberOfModelAsMax.keys()) print(percOfModelsAsMax) print('mean', round(mean(allPersPerfList), 2), 'median', round(median(allPersPerfList), 2), 'MAD', round(median_absolute_deviation(allPersPerfList), 2))