def cla_filter(): aux = [] resul1 = [[], [], [], [], [], [], []] resul2 = [[], [], [], [], [], [], []] resul3 = [[], [], [], [], [], [], []] resul4 = [[], [], [], [], [], [], []] resul5 = [[], [], [], [], [], [], []] resul6 = [[], [], [], [], [], [], []] resul7 = [[], [], [], [], [], [], []] resul8 = [[], [], [], [], [], [], []] resul9 = [[], [], [], [], [], [], []] roc_m_1 = [[], [], [], [], [], [], []] roc_m_2 = [[], [], [], [], [], [], []] roc_m_3 = [[], [], [], [], [], [], []] roc_m_4 = [[], [], [], [], [], [], []] roc_m_5 = [[], [], [], [], [], [], []] roc_m_6 = [[], [], [], [], [], [], []] roc_m_7 = [[], [], [], [], [], [], []] roc_m_8 = [[], [], [], [], [], [], []] roc_m_9 = [[], [], [], [], [], [], []] SMILaMax = [simpleMIL(), {'type': 'max'}, 'MIL max', resul1, roc_m_1] SMILaMin = [simpleMIL(), {'type': 'min'}, 'MIL min', resul2, roc_m_2] SMILaExt = [ simpleMIL(), { 'type': 'extreme' }, 'MIL Extreme', resul3, roc_m_3 ] BOW_clas = [ BOW(), { 'k': 90, 'covar_type': 'diag', 'n_iter': 20 }, 'BOW', resul4, roc_m_4 ] CKNN_cla = [ CKNN(), { 'references': 3, 'citers': 5 }, 'CKNN', resul5, roc_m_5 ] maxDD_cl = [maxDD(), {}, 'DIVERSE DENSITY', resul6, roc_m_6] EMDD_cla = [EMDD(), {}, 'EM-DD', resul7, roc_m_7] MILB_cla = [MILBoost(), {}, 'MILBOOST', resul8, roc_m_8] MILES_cl = [MILES(), {}, 'MILES', resul9, roc_m_9] aux.append(SMILaMax) # aux.append(SMILaMin) # aux.append(SMILaExt) aux.append(BOW_clas) # aux.append(CKNN_cla) aux.append(maxDD_cl) # aux.append(EMDD_cla) # aux.append(MILB_cla) # aux.append(MILES_cl) return aux
def cla_filter_ipf(): aux = [] resul1 = [[],[],[],[],[],[],[]] roc_m_1 = [[],[],[],[],[],[],[]] SMILaMax = [simpleMIL(),{'type': 'max'},'MIL max',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] SMILaMin = [simpleMIL(),{'type': 'min'},'MIL min',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] SMILaExt = [simpleMIL(),{'type': 'extreme'},'MIL Extreme',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] BOW_clas = [BOW(),{'k':90,'covar_type':'diag','n_iter':20},'BOW',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] CKNN_cla = [CKNN(),{'references': 3, 'citers': 5},'CKNN',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] maxDD_cl = [maxDD(),{},'DIVERSE DENSITY',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] EMDD_cla = [EMDD(),{},'EM-DD',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] MILB_cla = [MILBoost(),{},'MILBOOST',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] # MILES_cl = [MILES(),{},'MILES',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)] aux.append(SMILaMax) aux.append(SMILaMin) aux.append(SMILaExt) aux.append(BOW_clas) aux.append(CKNN_cla) aux.append(maxDD_cl) aux.append(EMDD_cla) aux.append(MILB_cla) # aux.append(MILES_cl) return aux
def mil_cv_filter_ipf(bags_f,labels_f,folds,votacion,clasificador_): # print('\t\t\tFiltrando...') error = 0.01 toStop = 3 stop = True countToStop = 0 if len(labels_f) < folds: folds = len(labels_f) skf = StratifiedKFold(n_splits=folds) totalNoisyLabel = 0 while stop: bags_f,labels_f = shuffle(bags_f, labels_f, random_state=rand.randint(0, len(labels_f)-1)) isCorrectLabel = np.ones((folds, len(labels_f)), dtype=bool) fold = 0 for train_index, test_index in skf.split(bags_f, labels_f.reshape(len(labels_f))): X_train = [bags_f[i] for i in train_index] Y_train = labels_f[train_index] # print('\t\t\t=>FOLD : '+str(fold)) try: if len(clasificador_[1]) > 0: clasificador_[0].fit(X_train, Y_train, **clasificador_[1]) else: clasificador_[0].fit(bags_f, labels_f) predictions = clasificador_[0].predict(X_train) if (isinstance(predictions, tuple)): predictions = predictions[0] except: print('Fallo, segundo intento') try: if len(clasificador_[1]) > 0: clasificador_[0].fit(X_train, Y_train, **clasificador_[1]) else: clasificador_[0].fit(bags_f, labels_f) predictions = clasificador_[0].predict(X_train) if (isinstance(predictions, tuple)): predictions = predictions[0] print('OK') except: print('Posible fallo en bolsa...') try: print('Cambiando clasificador..') Cla_error = simpleMIL() par_error = {'type': 'max'} if len(par_error) > 0: Cla_error.fit(X_train, Y_train, **par_error) else: Cla_error.fit(X_train, Y_train) predictions = Cla_error.predict(X_train) if (isinstance(predictions, tuple)): predictions = predictions[0] print('OK') except: predictions = np.ones((1, len(Y_train)), dtype=int) predictions = predictions[0] print('Fallo') for l,p in enumerate(train_index): try: isCorrectLabel[fold][p] = (Y_train.T[0][l] == np.sign(predictions[l])) except IndexError: print("Fallo en ultimo indice!") fold = fold + 1 if votacion == 'maxVotos': noisyBags = [] for n in range(0,len(labels_f)): aux = 0 for m in range(0,folds): if not isCorrectLabel[m][n]: aux = aux+1 if aux > folds/2: noisyBags.append(n) if votacion == 'consenso': noisyBags = [] for n in range(0,len(labels_f)): aux = True for m in range(0,folds): if aux: if isCorrectLabel[m][n]: aux = False if aux: noisyBags.append(n) nonNoisyBags = [] cont = 0 if len(noisyBags) == 0: for z in range(0,len(bags_f)): nonNoisyBags.append(z) else: for z in range(0,len(bags_f)): if cont < len(noisyBags) and noisyBags[cont] == z: cont = cont + 1 else: nonNoisyBags.append(z) if len(noisyBags) < (len(bags_f)*error): countToStop = countToStop + 1 else: countToStop = 0 if countToStop == toStop: stop = False else: bags_f = [bags_f[d] for d in nonNoisyBags] labels_f = labels_f[nonNoisyBags] if len(bags_f) < len(labels_f.reshape(len(labels_f))): print('Número de bolsas, menor al número de etiquetas, no se puede continuar') stop = False #Comprobacion nueva 28/10/19 if len(labels_f) < 1: stop = False totalNoisyLabel+=len(noisyBags) print('\t\t\t=>Elementos eliminados por '+clasificador_[2]+': '+str(totalNoisyLabel)) X_train_NoNy = bags_f Y_train_NoNy = labels_f return X_train_NoNy,Y_train_NoNy
def mil_cv_filter_ef(bags_f, labels_f, folds, votacion, num): # print('\t\t\tFiltrando...') if num == 1: Clasificadores = cla_filter() else: Clasificadores = cla_filter2() bags_f, labels_f = shuffle(bags_f, labels_f, random_state=rand.randint(0, 100)) if len(labels_f) < folds: folds = len(labels_f) skf = StratifiedKFold(n_splits=folds) isCorrectLabel = np.ones((len(Clasificadores), len(labels_f)), dtype=bool) for train_index, test_index in skf.split(bags_f, labels_f.reshape(len(labels_f))): X_train = [bags_f[i] for i in train_index] Y_train = labels_f[train_index] X_test = [bags_f[i] for i in test_index] Y_test = labels_f[test_index] for s, cl in enumerate(Clasificadores): try: if len(Clasificadores[s][1]) > 0: Clasificadores[s][0].fit(X_train, Y_train, **Clasificadores[s][1]) else: Clasificadores[s][0].fit(bags_f, labels_f) predictions = Clasificadores[s][0].predict(X_test) if (isinstance(predictions, tuple)): predictions = predictions[0] except: print('Fallo, segundo intento') try: if len(Clasificadores[s][1]) > 0: Clasificadores[s][0].fit(X_train, Y_train, **Clasificadores[s][1]) else: Clasificadores[s][0].fit(bags_f, labels_f) predictions = Clasificadores[s][0].predict(X_test) if (isinstance(predictions, tuple)): predictions = predictions[0] print('OK') except: print('Posible fallo en bolsa...') try: if len(Clasificadores[s][1]) > 0: Clasificadores[s][0].fit(X_train, Y_train, **Clasificadores[s][1]) else: Clasificadores[s][0].fit(X_train, Y_train) predictions = Clasificadores[s][0].predict(X_test) if (isinstance(predictions, tuple)): predictions = predictions[0] print('OK') except: try: print('Cambiando clasificador..') Cla_error = simpleMIL() par_error = {'type': 'max'} if len(par_error) > 0: Cla_error.fit(X_train, Y_train, **par_error) else: Cla_error.fit(X_train, Y_train) predictions = Cla_error.predict(X_train) if (isinstance(predictions, tuple)): predictions = predictions[0] print('OK') except: predictions = np.ones((1, len(Y_train)), dtype=int) print('Fallo') for l, p in enumerate(test_index): try: isCorrectLabel[s][p] = (Y_test.T[0][l] == np.sign( predictions[l])) except IndexError: print("Fallo en ultimo indice!") if votacion == 'maxVotos': noisyBags = [] for n in range(0, len(labels_f)): aux = 0 for m in range(0, len(Clasificadores)): if not isCorrectLabel[m][n]: aux = aux + 1 if aux > len(Clasificadores) / 2: noisyBags.append(n) if votacion == 'consenso': noisyBags = [] for n in range(0, len(labels_f)): aux = True for m in range(0, len(Clasificadores)): if aux: if isCorrectLabel[m][n]: aux = False if aux: noisyBags.append(n) nonNoisyBags = [] cont = 0 if len(noisyBags) == 0: for z in range(0, len(bags_f)): nonNoisyBags.append(z) else: for z in range(0, len(bags_f)): if cont < len(noisyBags) and noisyBags[cont] == z: cont = cont + 1 else: nonNoisyBags.append(z) print('\t\t\t=>Elementos eliminados con Filter ' + str(num + 1) + ': ' + str(len(noisyBags))) X_train_NoNy = [bags_f[i] for i in nonNoisyBags] Y_train_NoNy = labels_f[nonNoisyBags] return X_train_NoNy, Y_train_NoNy
bags, labels = shuffle(bags, labels, random_state=rand.randint(0, 100)) #Number of Folds folds = 5 bow_classifier = BOW() #parameters_bow = {'k':100,'covar_type':'diag','n_iter':20} parameters_bow = {'k': 10, 'covar_type': 'diag', 'n_iter': 20} accuracie, results_accuracie, auc, results_auc = mil_cross_val( bags=bags, labels=labels, model=bow_classifier, folds=folds, parameters=parameters_bow) SMILa = simpleMIL() parameters_smil = {'type': 'max'} #En este me funciono maxDD porque no tiene problem con parametros accuracie, results_accuracie, auc, results_auc, elapsed = mil_cross_val( bags=bags, labels=labels, model=SMILa, folds=folds, parameters=parameters_smil, timer=True) parameters_smil = {'type': 'min'} #En este me funciono maxDD porque no tiene problem con parametros accuracie, results_accuracie, auc, results_auc = mil_cross_val( bags=bags, labels=labels,
train_labels, k=10, covar_type='diag', n_iter=20) predictions = bow_classifier.predict(test_bags) accuracie = np.average(test_labels.T == np.sign(predictions)) print '\n Accuracy: %.2f%%' % (100 * accuracie) fpr, tpr, thresholds = metrics.roc_curve(test_labels, predictions, pos_label=1.) metrics.auc(fpr, tpr) ##################### #simpleMIL [average]# ##################### SMILa = simpleMIL() SMILa.fit(train_bags, train_labels, type='average') predictions = SMILa.predict(test_bags) accuracie = np.average(test_labels.T == np.sign(predictions)) print '\n Accuracy: %.2f%%' % (100 * accuracie) fpr, tpr, thresholds = metrics.roc_curve(test_labels, predictions, pos_label=1.) metrics.auc(fpr, tpr) ##################### #simpleMIL [extreme]# ##################### SMILe = simpleMIL() SMILe.fit(train_bags, train_labels, type='extreme') predictions = SMILe.predict(test_bags)