def rank(dataset, algorithm, spatial): _, _, labels, _ = datatools.preparedata(dataset, order=spatial) allfeatuers = [labels[x] for x in labels if x < 0] data = [x for x in history if x['algorithm']==algorithm and x['spatial']==spatial and datatools.compatible(datatools.metadata(dataset), x['training set']) and x['best genome'] is not None and x['generations']>100] #gen>100 to exclude some test runs to debug problems if len(data)==0: print("No run found") return {},0 ranked = {key : {f :0.0 for f in allfeatuers} for key in data[0]['training set']['genres']} if spatial==1: ranked['Output'] = {f :0.0 for f in allfeatuers} ## workaround scoresum=0.0 for datum in data: best = datum['best genome'] scoresum+= datum['control score'] ** 2 for i in range(0, datum['spatial']): crunch_output_node(i, ranked[labels[i]], best, labels) for k in ranked: for f in ranked[k]: ranked[k][f] *= datum['control score']**2 for k in ranked: for f in ranked[k]: ranked[k][f] /= scoresum if spatial == 1: meta = datatools.metadata(dataset) mapping = datatools.generalMapping(meta['genres'], spatial) for key in ranked: if key != 'Output': for f in ranked[key]: ranked[key][f] = ranked['Output'][f] * mapping[key][0] if mapping[key][0]>0 else -ranked['Output'][f] ranked.pop('Output',None) maximum = max([x for genre in ranked for x in ranked[genre].values()]) minimum = min([x for genre in ranked for x in ranked[genre].values()]) for genre in ranked: for f in ranked[genre]: ranked[genre][f] = (ranked[genre][f])/(maximum - minimum) return ranked, len(data)
def bestPerformer(dataset): m = datatools.metadata(dataset) data = [x for x in history if datatools.compatible(x['training set'], m)] best = (1,None) for d in data: if 1.0*len(d['control errors'])/d['control set']['size'] < best[0]: best =(1.0*len(d['control errors'])/d['control set']['size'] ,d) return best
def averagePerformance(dataset, algorithm, spatial): m = datatools.metadata(dataset) data = [x for x in history if datatools.compatible(x['training set'], m) and x['algorithm']==algorithm and x['spatial']==spatial and x['generations']>100] total=0 for x in data: total+= 1.0*len(x['control errors'])/x['control set']['size'] return 1.0 - total/len(data) if len(data)>0 else 0
def rank(dataset, algorithm, spatial): data = [x for x in history if x['algorithm']==algorithm and x['spatial']==spatial and datatools.compatible(datatools.metadata(dataset), x['training set']) and x['best genome'] is not None] if len(data)==0: print("No run found") return {},0 ranked = {key : {f :0.0 for f in allfeatuers} for key in data[0]['training set']['genres']} ranked['Output'] = {f :0.0 for f in allfeatuers} ## workaround scoresum=0.0 for datum in data: best = datum['best genome'] scoresum+= datum['control score'] for i in range(0, spatial): crunch_output_node(i, ranked[labels[i]], best) for k in ranked: for f in ranked[k]: ranked[k][f] *= datum['control score'] for k in ranked: for f in ranked[k]: ranked[k][f] /= scoresum if spatial == 1: #todo: ammesso che i generi siano 2 #todo: prendi ranked['Output'] #todo: associa al genere[0] l'opposto dei valori, e al genere[1] i valori originari #todo: con più di 2 generi??????? pass return ranked, len(data)
'./Datasets/classic-jazz-rock.dat', './Datasets/classic-rock.dat', './Datasets/classic-jazz.dat', './Datasets/jazz-rock.dat' ] #for d,a,s in itertools.product(datasets, algorithms, spatials): # print(d,a,'(spatial:',s,') ',len([x for x in history if x['algorithm']==a and x['spatial']==s and x['training set']['path']==d])) """for d in [x for x in history if x['training set']['path']==datasets[1] and x['spatial']==2]: print('\n\n################################', d['generations'],'algorithm:', d['algorithm']) datatools.showErrors(d) exit(0)""" for d, a, s in itertools.product(datasets, algorithms, spatials): values = [ x['control score'] for x in history if x['algorithm'] == a and x['spatial'] == s and datatools.compatible(datatools.metadata(d), x['training set']) ] if len(values) > 0: print(d, a, s, ' \t', len(values), ':', averagePerformance(d, a, s), sum(values) / len(values)) for x in itertools.product(datasets, algorithms, spatials): plotRank(x[0], x[1], x[2]) #showrank(x[0],x[1],x[2]) #for d in datasets: # x = bestPerformer(d) # print(d,':',x[0],x[1]['algorithm'], '(spatial:',x[1]['spatial'],')', x[1]['generations'], x[1]['control score'], len(x[1]['control errors']))