bestF1s=[] while cont<expe['numTests']: #Divide the data in training, testing and validation splitData=dmlPre.dataSplitBalancedClass(data, labels) trainData=splitData['trainData'] trainLabels=splitData['trainLabels'] testData=splitData['testData'] testLabels=splitData['testLabels'] valData=splitData['valData'] valLabels=splitData['valLabels'] uLabels=np.unique(np.hstack((np.unique(trainLabels),np.unique(testLabels),np.unique(valLabels)))) #Train the different classifiers and predict for the testing and validation data sets clfs=fun.clfsEval(trainData,testData,valData,trainLabels,testLabels,valLabels,uLabels) #Calculates the majority vote and the agreement rate for all of the classifiers mvLabels,agmntLevels=fun.agreementRates(clfs,valLabels,uLabels,plot=expe['plots']) if oldAgmntLevels==[]: oldAgmntLevels=sorted(np.unique(agmntLevels)) if oldAgmntLevels!=[]: if sorted(np.unique(agmntLevels))==oldAgmntLevels: #Calculates the f1_scores for the majority vote labels and the predicted outputs #from the different classifiers fun.clfsVal(clfs,mvLabels,agmntLevels,uLabels) f1_score_agmnt=f1_score(valLabels,mvLabels,labels=uLabels) if expe['verbose']==1:
filename=dataPath+'/'+file data,labels,features=fun.dataExtract(filename) if testAndVal==True: print('training model for file %s'%(file)) #This section if we want to have data split into training, testing and validation # Data separation splitData=dmlPre.dataSplitBalancedClass(data, labels) trainData=splitData['trainData'] trainLabels=splitData['trainLabels'] testData=splitData['testData'] testLabels=splitData['testLabels'] valData=splitData['valData'] valLabels=splitData['valLabels'] uLabels=np.unique(np.hstack((np.unique(trainLabels),np.unique(testLabels),np.unique(valLabels)))) #Train the different classifiers and predict for the testing and validation data sets tempClf=fun.clfsEval(trainData,testData,valData,trainLabels,testLabels,valLabels,uLabels,classN=1) allData['user'].append(file) allData['trainData'].append(trainData) allData['trainLabels'].append(trainLabels) allData['testData'].append(testData) allData['testLabels'].append(testLabels) allData['valData'].append(valData) allData['valLabels'].append(valLabels) allData['features'].append(features) else: #The data is not splitted uLabels=np.unique(labels) if storeModels==True: print('training model for file %s'%(file)) tempClf=fun.clfsEval(data,data[:100,],data[100:200,:],labels,labels[:100],labels[100:200],uLabels,classN=1)