# 计算插值样本 # 3.随机选取K中的一个样本 np.random.seed(self.random_state) choice_all = k_sample_index.flatten() # print('choice_all',choice_all) # print('choice_all[choice_all != 0]',choice_all != 0) choosed = np.random.choice(choice_all[choice_all != 0]) # print('choosed',choosed) # 4. 在正样本和随机样本之间选出一个点 diff = sample.iloc[choosed, ] - sample.iloc[i, ] # print('diff',type(diff), diff) gap = np.random.rand(1, n_atters) # print('gap', gap) # print('sample.iloc[i,]', sample.iloc[i,]) new.loc[i] = [x for x in sample.iloc[i, ] + gap.flatten() * diff] # print('new',new) label_out = np.r_[label_out, tp_less] ##给新增加的一行数据添加label标签 print('new', new) new_sample = pd.concat([x_data, new]) new_sample.reset_index(inplace=True, drop=True) return new_sample, label_out if __name__ == '__main__': x = pd.DataFrame(data=x, columns=columns) y = y smt = SMOTE() x_new, y_new = smt.over_sample(x, y) print(Counter(y_new))