# df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018().sample(2000) df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018() logging.debug("data shape: %s", df.shape) y = df.pop('Dry_Yield') X = df ### # transform ### X, label_cols = preprocessing.shape_gis_pps(X) transform_pipe = make_one_hot_pipeline(X, label_cols, [ preprocessing.FillNaTransformer(), preprocessing.NumericTransformer(), StandardScaler() ]) ### # Run Model ### kcv = KFold(n_splits=10, shuffle=True, random_state=972) kf_runs = itertools.islice(kcv.split(X), 3) scores = [] for i, (train_split_idx, test_split_idx) in enumerate(kf_runs): log.info("Running kfold: %s", i) X_train_split, y_train_split = X.iloc[train_split_idx], y.iloc[
# df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018().sample(2000) # dl.dump_sample(df, 'df-corn-20171018-scratch-debug.pickle') df: DataFrame = dl.load_pickled('df-corn-20171018-scratch-debug.pickle') # df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018() log.debug("data shape: %s", df.shape) y = df.pop('Dry_Yield') X = df ### # transform pipeline setup ### X, label_cols = dl.shape_gis_pps(X) transform_pipe, one_hot_label_enc = make_one_hot_pipeline( X, label_cols, [preprocessing.FillNaTransformer(), preprocessing.NumericTransformer()]) # , StandardScaler() ### # Run Model ### def save_plot(name: str): plt.tight_layout(pad=1.25) plt.savefig(create_result_file_path(name)) kcv = KFold(n_splits=7, shuffle=True, random_state=988) kf_runs = itertools.islice(kcv.split(X), 2)