def run(X, y, n_samples_for_intial, n_queries, n_comittee_members, estimator): # start timer start_time = time.time() # init list of different learners learners = [] X_train, y_train, X_pool, y_pool = create_random_pool_and_initial_sets(X, y, n_samples_for_intial) for member_idx in range(n_comittee_members): learners.append(ActiveLearner(estimator=estimator, X_training=X_train, y_training=y_train)) # init committee committee = Committee(learner_list=learners, query_strategy=max_disagreement_sampling) unqueried_score = committee.score(X, y) print('Score over unqueried samples {:0.4f}'.format(unqueried_score)) performance_history = [] f1_score = 0 index = 0 while f1_score < 0.65: index += 1 # get sample from pool query_idx, query_instance = committee.query(X_pool) # retrain comittee with new sample committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = delete_rows_csr(X_pool, query_idx) y_pool = np.delete(y_pool, query_idx) y_pred = committee.predict(X) f1_score = metrics.f1_score(y, y_pred, average='micro') if index % 100 == 0: print('F1 score after {n} training samples: {f1:0.4f}'.format(n=index, f1=f1_score)) # save accuracy score performance_history.append(f1_score) print("--- %s seconds ---" % (time.time() - start_time)) print(performance_history) return index
def active_learn(df1, first_item_index_of_each_category): train_idx = first_item_index_of_each_category # X_train = iris['data'][train_idx] # y_train = iris['target'][train_idx] # initial training data data = df1.values[:, 1:] target = df1['label'].values X_full = df1.values[:, 1:] y_full = df1['label'].values X_train = df1.values[:, 1:][ train_idx] #item from second column as the first column is the label.. y_train = df1['label'].values[train_idx] # X_pool = np.delete(data, train_idx, axis=0) # y_pool = np.delete(target, train_idx) X_pool = deepcopy(X_full) y_pool = deepcopy(y_full) # initializing Committee members n_members = 2 learner_list = list() for member_idx in range(n_members): # initial training data # n_initial = 5 # train_idx = np.random.choice(range(X_pool.shape[0]), size=n_initial, replace=False) # X_train = X_pool[train_idx] # y_train = y_pool[train_idx] # creating a reduced copy of the data with the known instances removed X_pool = np.delete(X_pool, train_idx, axis=0) y_pool = np.delete(y_pool, train_idx) # initializing learner learner = ActiveLearner(estimator=RandomForestClassifier(), X_training=X_train, y_training=y_train) learner_list.append(learner) # assembling the committee committee = Committee(learner_list=learner_list) # print('Committee initial predictions, accuracy = %1.3f' % committee.score(data, target)) print('%1.3f' % committee.score(data, target)) performance_array = [] n_queries = 505 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) learner_score = committee.score(data, target) # print('Committee %d th query predictions, accuracy = %1.3f' % (idx , learner_score)) print('%1.3f' % (learner_score)) if (idx % 100 == 0): performance_array.append(learner_score) percentage_increase(performance_array)
plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() # visualizing the initial predictions with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(iris['data']) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions, accuracy = %1.3f' % committee.score(iris['data'], iris['target'])) plt.show() # query by committee n_queries = 10 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) # visualizing the final predictions per learner with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members * 7, 7))
# for learner_idx, learner in enumerate(committee): # plt.subplot(1, n_members, learner_idx + 1) # plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=5) # plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) # plt.show() # visualizing the Committee's predictions per learner # with plt.style.context('seaborn-white'): # plt.figure(figsize=(7, 7)) # prediction = committee.predict(iris['data']) # plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=5) # plt.title('Committee initial predictions') # plt.show() # query by committee unqueried_score = committee.score(train_features, train_labels) performance_history = [unqueried_score] n_queries = 100 for _ in range(n_queries): query_idx, query_instance = committee.query(X_pool) # -> Here print(query_instance, " ", query_idx) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) performance_history.append(committee.score(train_features, train_labels)) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) print(performance_history) # visualizing the final predictions per learner
def query(): # n_initial = 100 # X, y = load_digits(return_X_y=True) # X_train, X_test, y_train, y_test = train_test_split(X, y) # # initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) # # X_initial, y_initial = X_train[initial_idx], y_train[initial_idx] # X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) strategy = None classifier = None file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename filename = secure_filename(file.filename) # shutil.rmtree(os.path.join(app.config['UPLOAD_FOLDER'],filename.split(".")[0])) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) if(filename.split(".")[1]=="rar"): patoolib.extract_archive(os.path.join(UPLOAD_FOLDER, filename), outdir=os.path.join(UPLOAD_FOLDER)) else: zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() print("Succesfull") st = request.form.get('strategy_select') cl = request.form.get('classifier_select') option = int(request.form.get('structure_select')) print(cl) if(str(cl)=='Random Forest'): classifier = RandomForestClassifier() elif(str(cl)=='KNN'): classifier = KNeighborsClassifier() else: classifier = DecisionTreeClassifier() n_queries = request.form['queries'] print(st) classlist =[] classes = {} data = {} data['image'] = [] data['label'] = [] filename = secure_filename(file.filename) print(filename) if option == 0: for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): print(filenames) for filename in filenames: if('.jpg' in filename or 'jpeg' in filename or 'png' in filename): image = Image.open(os.path.join(dirname, filename)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(dirname.split('\\')[-1] not in classes.keys()): classlist.append({'name':dirname.split('\\')[-1],'number':len(classes)}) classes[dirname.split('\\')[-1]] = len(classes) #print(os.path.join(dirname, filename)) #print(dirname) data['label'].append(classes[dirname.split('\\')[-1]]) print(classes) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"): image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()): classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)}) classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes) data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) print(classes) else: continue X = data['image'] y = data['label'] n_initial = 100 X_train, X_test, y_train, y_test = train_test_split(X, y) initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) X_initial=[] y_initial = [] print(type(X_initial)) for i in range(n_initial): v = np.array(X_train[initial_idx[i]]).reshape((1,size)) #print(v.shape) y_initial.append(y_train[i]) if(i==0): X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size)) print(X_initial.shape) else: X_initial = np.append(X_initial,v,axis=0) #print("X Shape",X_initial.shape) # X_initial = X_initial.append(X_train[initial_idx[i]]) X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) print(X.shape) print(X[0].shape) print(X_initial.shape) params = {} params["X_test"] = X_test params["y_test"] = y_test params["counter"] = n_queries params["X_pool"] = X_pool params["y_pool"] = y_pool if(str(st)=='Uncertainty Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=uncertainty_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) print("Calling Helper") return helper() elif(str(st)=='Entropy Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=entropy_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Random Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=random_sampling, X_training=X_train, y_training=y_train ) accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Vote Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=vote_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Uncertainty Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=uncertainty_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max Disagreement Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_disagreement_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max STD Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_std_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=consensus_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper()
# In[66]: version_space_length = version_space.shape[0] ind = np.argsort(version_space[:, 1]) print('Order of points to label:') for i in range(version_space_length): print('#{size:2d}'.format(size=i) + ' point:' + str(version_space_points[ind[i]]) + ' label:' + str(version_space_labels[ind[i]])) # In[67]: print("Initial accuracy =", committee.score(X, Y)) # In[68]: x = 40 queries = int((x / 100) * 150) accuracy_list = [] accuracy_list.append(committee.score(X, Y)) # In[69]: iter = 0 print("Accuracy after", 0, "iterations :", committee.score(X, Y)) for i in range(0, queries):
plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(data), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() # visualizing the initial predictions with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(data) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions, accuracy = %1.3f' % committee.score(data, target)) plt.show() # query by committee n_queries = 10 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) # visualizing the final predictions per learner with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members * 7, 7))
learner_list.append(learner) # assembling the committee committee = Committee(learner_list=learner_list, query_strategy=vote_entropy_sampling) ''' with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members*7, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() ''' unqueried_score = committee.score(iris['data'], iris['target']) ''' with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(iris['data']) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions, accuracy = %1.3f' % unqueried_score) plt.show() ''' performance_history = [unqueried_score] # query by committee n_queries = 20 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool)
def query(): strategy = None classifier = None file = request.files['file'] test = request.files['test_file'] filename = secure_filename(file.filename) test_filename = secure_filename(test.filename) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() if test and allowed_file(test.filename): filename = secure_filename(test.filename) test.save(os.path.join(UPLOAD_FOLDER, filename)) zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() st = request.form.get('strategy_select') cl = request.form.get('classifier_select') option = int(request.form.get('structure_select')) if(str(cl)=='Random Forest'): classifier = RandomForestClassifier() elif(str(cl)=='KNN'): classifier = KNeighborsClassifier() else: classifier = DecisionTreeClassifier() n_queries = request.form['queries'] classlist =[] classes = {} data = {} data['image'] = [] data['label'] = [] data['image_name'] = [] image_data = {} filename = secure_filename(file.filename) if option == 0: for root,dirs,filename in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): for name in filename: if name.endswith(".jpg") or name.endswith(".jpeg") or name.endswith(".png"): image_file_name = os.path.join(root,name) image = Image.open(image_file_name) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name else: try: x = numpy.array(image).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name data['image'] = np.append(data['image'],x,axis=0) except: continue if root.split("\\")[-1] not in classes.keys(): classlist.append({'name':root.split('\\')[-1],'number':len(classes)}) classes[root.split('\\')[-1]] = len(classes) data['label'].append(classes[root.split('\\')[-1]]) data['image_name'].append(image_file_name) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith(".png"): image_file_name = os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile) image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name else: try: x = numpy.array(image).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name data['image'] = np.append(data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()): classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)}) classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes) data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) data['image_name'].append(imfile) else: continue test_classlist =[] test_classes = {} test_data = {} test_data['image'] = [] test_data['label'] = [] test_data['image_name'] = [] if option == 0: for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])): for filename in filenames: if('.jpg' in filename or 'jpeg' in filename or 'png' in filename): image = Image.open(os.path.join(dirname, filename)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(test_classes)==0): test_data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) test_data['image'] = np.append(test_data['image'],x,axis=0) except: continue if(dirname.split('\\')[-1] not in test_classes.keys()): test_classlist.append({'name':dirname.split('\\')[-1],'number':len(test_classes)}) test_classes[dirname.split('\\')[-1]] = len(test_classes) test_data['label'].append(test_classes[dirname.split('\\')[-1]]) test_data['image_name'].append(filename) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"): image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(test_classes)==0): test_data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) test_data['image'] = np.append(test_data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in test_classes.keys()): test_classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(test_classes)}) test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(test_classes) test_data['label'].append(test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) test_data['image_name'].append(imfile) else: continue X_train = data['image'] y_train = data['label'] X_test = test_data['image'] y_test = test_data['label'] n_initial = 100 initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) X_initial=[] y_initial = [] for i in range(n_initial): v = np.array(X_train[initial_idx[i]]).reshape((1,size)) y_initial.append(y_train[i]) if(i==0): X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size)) else: X_initial = np.append(X_initial,v,axis=0) X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) params = {} params["X_test"] = X_test params["y_test"] = y_test params["counter"] = n_queries params["X_pool"] = X_pool params["y_pool"] = y_pool if(str(st)=='Uncertainty Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=uncertainty_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Entropy Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=entropy_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Random Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=random_sampling, X_training=X_train, y_training=y_train ) accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Vote Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=vote_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Uncertainty Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=uncertainty_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Max Disagreement Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_disagreement_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=consensus_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper()
query_strategy=strategy) member4 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=8), query_strategy=strategy) member5 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=10), query_strategy=strategy) committee = Committee( learner_list=[member1, member2, member3, member4, member5]) # In[8]: print("Initial accuracy =", committee.score(X, Y)) # In[9]: member1r = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=8), query_strategy=strategy) member2r = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=1), query_strategy=strategy) member3r = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=10), query_strategy=strategy)