random_state=42, verbosity=2) pipeline_optimizer.fit(X, y_int) pipeline_optimizer.export('tpot_exported_pipeline.py') if TEST_FEATURE_SELECTION: fs.identify_missing(missing_threshold=0.6) missing_features = fs.ops['missing'] missing_features[:10] fs.plot_missing() fs.missing_stats.head(10) fs.identify_single_unique() single_unique = fs.ops['single_unique'] single_unique fs.plot_unique() fs.identify_collinear(correlation_threshold=0.975) correlated_features = fs.ops['collinear'] correlated_features[:5] fs.plot_collinear()
#对于pandas,行标为index,列表为columns #如常用df = pd.DataFrame(np.random.randn(5,3),index = list('abcde'),columns = ['one','two','three']) #Create the Instance fs = FeatureSelector(data=train, labels=train_labels) # 1 Missing Values fs.identify_missing(missing_threshold=0.6) #The features identified for removal can be accessed through the ops dictionary of the FeatureSelector object. missing_features = fs.ops['missing'] print(missing_features[:20]) fs.plot_missing() #在每一个画图的后面加上plt.show即可 plt.show() print(fs.missing_stats.head(20)) # 2 Single Unique Value fs.identify_single_unique() single_unique = fs.ops['single_unique'] print(single_unique) fs.plot_unique() #画图都不好用 plt.show() print(fs.unique_stats.sample(5)) # 3 Collinear (highly correlated) Feature