def TrainStacking(df_train1, df_train2, models): y_stack = df_train2[target].values.astype(float) X_stack = np.array([]) for dt, mt, m, C in models: if dt == 'nume': X_train1 = df_train1[C].values.astype(float) X_train2 = df_train2[C].values.astype(float) else: X_train1 = df_train1[C].values X_train2 = df_train2[C].values X_train1, X_train2 = dp.CateEncoderTwoMat(X_train1, X_train2) y_train = df_train1[target].values.astype(float) if mt == 'classification': y_train = np.where(y_train > 0, 1, 0) m.fit(X_train1, y_train) p_stack = m.predict(X_train2) p_stack = np.reshape(p_stack, (len(p_stack), 1)) X_stack = np.hstack((X_stack, p_stack)) if X_stack.size else p_stack model_stack = LinearRegression() model_stack.fit(X_stack, y_stack) # stack_coef = model.coef_ return models, model_stack
def TrainAndValidationClassification(df_train, df_val, model, columns, data_type='nume'): sys.stdout.write('Extract data ... ') sys.stdout.flush() if data_type == 'nume': X_train = df_train[columns].values.astype(float) X_val = df_val[columns].values.astype(float) else: X_train = df_train[columns].values X_val = df_val[columns].values X_train, X_val = dp.CateEncoderTwoMat(X_train, X_val) target = 'logerror_sign' y_train = df_train[target].values.astype(float) y_val = df_val[target].values.astype(float) # y_train = abs(y_train) # y_val = abs(y_val) sys.stdout.write('done.\n') sys.stdout.write('Train ... ') sys.stdout.flush() model.fit(X_train, y_train) sys.stdout.write('done.\n') sys.stdout.write('Predict ...') sys.stdout.flush() p_train = model.predict(X_train) p_val = model.predict(X_val) sys.stdout.write('done.\n') acc_train = (p_train == y_train).sum() / float(len(y_train)) acc_val = (p_val == y_val).sum() / float(len(y_val)) print 'Accuracy in train :', acc_train print 'Accuracy in validation :', acc_val return y_train, y_val, p_train, p_val
def TrainAndValidation(df_train, df_val, model, columns, data_type='nume'): sys.stdout.write('Extract data ... ') sys.stdout.flush() if data_type == 'nume': X_train = df_train[columns].values.astype(float) X_val = df_val[columns].values.astype(float) else: X_train = df_train[columns].values X_val = df_val[columns].values X_train, X_val = dp.CateEncoderTwoMat(X_train, X_val) y_train = df_train[target].values.astype(float) y_val = df_val[target].values.astype(float) # y_train = abs(y_train) # y_val = abs(y_val) sys.stdout.write('done.\n') sys.stdout.write('Train ... ') sys.stdout.flush() model.fit(X_train, y_train) sys.stdout.write('done.\n') sys.stdout.write('Predict ...') sys.stdout.flush() p_train = model.predict(X_train) p_val = model.predict(X_val) sys.stdout.write('done.\n') mae_train = mean_absolute_error(y_train, p_train) mae_val = mean_absolute_error(y_val, p_val) print 'MAE in train :', mae_train print 'MAE in validation :', mae_val return y_train, y_val, p_train, p_val