示例#1
0
# df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018().sample(2000)

df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018()
logging.debug("data shape: %s", df.shape)

y = df.pop('Dry_Yield')
X = df

###
# transform
###

X, label_cols = preprocessing.shape_gis_pps(X)
transform_pipe = make_one_hot_pipeline(X, label_cols, [
    preprocessing.FillNaTransformer(),
    preprocessing.NumericTransformer(),
    StandardScaler()
])

###
# Run Model
###
kcv = KFold(n_splits=10, shuffle=True, random_state=972)

kf_runs = itertools.islice(kcv.split(X), 3)

scores = []
for i, (train_split_idx, test_split_idx) in enumerate(kf_runs):
    log.info("Running kfold: %s", i)

    X_train_split, y_train_split = X.iloc[train_split_idx], y.iloc[
示例#2
0
# df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018().sample(2000)
# dl.dump_sample(df, 'df-corn-20171018-scratch-debug.pickle')
df: DataFrame = dl.load_pickled('df-corn-20171018-scratch-debug.pickle')

# df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018()
log.debug("data shape: %s", df.shape)

y = df.pop('Dry_Yield')
X = df

###
# transform pipeline setup
###
X, label_cols = dl.shape_gis_pps(X)
transform_pipe, one_hot_label_enc = make_one_hot_pipeline(
    X, label_cols,
    [preprocessing.FillNaTransformer(),
     preprocessing.NumericTransformer()])

# , StandardScaler()


###
# Run Model
###
def save_plot(name: str):
    plt.tight_layout(pad=1.25)
    plt.savefig(create_result_file_path(name))


kcv = KFold(n_splits=7, shuffle=True, random_state=988)
kf_runs = itertools.islice(kcv.split(X), 2)