def prediction_pipeline(model): r"""AlphaPy Prediction Pipeline Parameters ---------- model : alphapy.Model The model object for controlling the pipeline. Returns ------- None : None Notes ----- The saved model is loaded from disk, and predictions are made on the new testing data. """ logger.info("Predict Mode") # Unpack the model specifications directory = model.specs['directory'] drop = model.specs['drop'] extension = model.specs['extension'] feature_selection = model.specs['feature_selection'] model_type = model.specs['model_type'] rfe = model.specs['rfe'] separator = model.specs['separator'] # Get all data. We need original train and test for interactions. partition = Partition.predict X_predict, _ = get_data(model, partition) # Load feature_map model = load_feature_map(model, directory) # Log feature statistics logger.info("Feature Statistics") logger.info("Number of Prediction Rows : %d", X_predict.shape[0]) logger.info("Number of Prediction Columns : %d", X_predict.shape[1]) # Apply treatments to the feature matrix all_features = apply_treatments(model, X_predict) # Drop features all_features = drop_features(all_features, drop) # Create initial features all_features = create_features(model, all_features) # Generate interactions all_features = create_interactions(model, all_features) # Remove low-variance features all_features = remove_lv_features(model, all_features) # Load the univariate support vector, if any if feature_selection: logger.info("Getting Univariate Support") try: support = model.feature_map['uni_support'] all_features = all_features[:, support] logger.info("New Feature Count : %d", all_features.shape[1]) except: logger.info("No Univariate Support") # Load the RFE support vector, if any if rfe: logger.info("Getting RFE Support") try: support = model.feature_map['rfe_support'] all_features = all_features[:, support] logger.info("New Feature Count : %d", all_features.shape[1]) except: logger.info("No RFE Support") # Load predictor predictor = load_predictor(directory) # Make predictions logger.info("Making Predictions") tag = 'BEST' model.preds[(tag, partition)] = predictor.predict(all_features) if model_type == ModelType.classification: model.probas[(tag, partition)] = predictor.predict_proba(all_features)[:, 1] # Get date stamp to record file creation d = datetime.now() f = "%Y%m%d" timestamp = d.strftime(f) # Save predictions save_predictions(model, tag, partition)
def prediction_pipeline(model): r"""AlphaPy Prediction Pipeline Parameters ---------- model : alphapy.Model The model object for controlling the pipeline. Returns ------- None : None Notes ----- The saved model is loaded from disk, and predictions are made on the new testing data. """ logger.info("Predict Mode") # Unpack the model specifications directory = model.specs['directory'] drop = model.specs['drop'] feature_selection = model.specs['feature_selection'] model_type = model.specs['model_type'] rfe = model.specs['rfe'] # Get all data. We need original train and test for encodings. X_train, y_train = get_data(model, Partition.train) partition = Partition.predict X_predict, _ = get_data(model, partition) # Load feature_map model = load_feature_map(model, directory) # Log feature statistics logger.info("Feature Statistics") logger.info("Number of Prediction Rows : %d", X_predict.shape[0]) logger.info("Number of Prediction Columns : %d", X_predict.shape[1]) # Apply transforms to the feature matrix X_all = apply_transforms(model, X_predict) # Drop features X_all = drop_features(X_all, drop) # Create initial features X_all = create_features(model, X_all, X_train, X_predict, y_train) # Generate interactions X_all = create_interactions(model, X_all) # Remove low-variance features X_all = remove_lv_features(model, X_all) # Load the univariate support vector, if any if feature_selection: logger.info("Getting Univariate Support") try: support = model.feature_map['uni_support'] X_all = X_all[:, support] logger.info("New Feature Count : %d", X_all.shape[1]) except: logger.info("No Univariate Support") # Load the RFE support vector, if any if rfe: logger.info("Getting RFE Support") try: support = model.feature_map['rfe_support'] X_all = X_all[:, support] logger.info("New Feature Count : %d", X_all.shape[1]) except: logger.info("No RFE Support") # Load predictor predictor = load_predictor(directory) # Make predictions logger.info("Making Predictions") tag = 'BEST' model.preds[(tag, partition)] = predictor.predict(X_all) if model_type == ModelType.classification: model.probas[(tag, partition)] = predictor.predict_proba(X_all)[:, 1] # Save predictions save_predictions(model, tag, partition) # Return the model return model