def calculate_table_train_tree(str_list,feature_list, trainx, trainy, predictx, predicty): positive_table=[] nagative_table=[] length=len(str_list[0]) for i in range(0,length): positive_table.append(len(str_list)) nagative_table.append(len(str_list)) for num in str_list: if num_all_zero(num): num[0]=1 feature=num_to_feature(num,feature_list) train_sample=read_data_feature(feature,trainx) predict_sample=read_data_feature(feature,predictx) acc_original = train_tree(train_sample, trainy, predict_sample, predicty) for i in range(0,length): new_num=reverse_index(num,i) feature = num_to_feature(new_num, feature_list) train_sample = read_data_feature(feature, trainx) predict_sample = read_data_feature(feature, predictx) acc_new= train_tree(train_sample, trainy, predict_sample, predicty) if acc_new>acc_original: if num[i]==0: nagative_table[i]+=1 else: positive_table[i]+=1 else: if num[i]==1: nagative_table[i]+=1 else: positive_table[i]+=1 return positive_table,nagative_table
def one_point_hybridization_knn(forest_list,feature_list,trainx,trainy,predictx,predicty,neighbor): ''' 一次单点杂交 :param forest_list: 森林列表,双层列表 :param feature_list:特征集合索引,特征集合的角标 :param trainx:训练集 :param trainy:训练集对应的分类 :param predictx:预测集合 :param predicty:预测集对应的分类 :return:森林字典:包括单点杂交后每棵树的准确率和森林的01串 森林列表:单点杂交后新的森林 ''' forest = {} # 记录森林里的准确率 forest_list=one_point_hybridization(forest_list) for num in forest_list: feature=num_to_feature(num,feature_list) train_sample=read_data_feature(feature,trainx) predict_sample=read_data_feature(feature,predictx) acc = train_knn(train_sample, trainy, predict_sample, predicty,neighbor) num_string = num_to_string(num) forest[num_string] = acc return forest,forest_list
feature_list = [] # 特征集合索引,特征集合的角标 for i in range(0, len(trainx[0])): feature_list.append(i) forest = {} #记录森林里的准确率 init_forest = random_init(50, len(trainx[0])) # for i in init_forest: # print i # print 'trainx',trainx # print 'trainy',trainy for num in init_forest: feature = num_to_feature(num, feature_list) train_sample = read_data_feature(feature, trainx) predict_sample = read_data_feature(feature, predictx) # print 'train_sample',train_sample # print 'train_y',trainy # print 'predict_sample',predict_sample # print 'predict_y',predicty acc = train_knn(train_sample, trainy, predict_sample, predicty, 1) num_string = num_to_string(num) forest[num_string] = acc # forest_area = [] forest_old = init_forest res = {} for i in range(0, 5):