示例#1
0
def prediction_pipeline(model):
    r"""AlphaPy Prediction Pipeline

    Parameters
    ----------
    model : alphapy.Model
        The model object for controlling the pipeline.

    Returns
    -------
    None : None

    Notes
    -----
    The saved model is loaded from disk, and predictions are made
    on the new testing data.

    """

    logger.info("Predict Mode")

    # Unpack the model specifications

    directory = model.specs['directory']
    drop = model.specs['drop']
    extension = model.specs['extension']
    feature_selection = model.specs['feature_selection']
    model_type = model.specs['model_type']
    rfe = model.specs['rfe']
    separator = model.specs['separator']

    # Get all data. We need original train and test for interactions.

    partition = Partition.predict
    X_predict, _ = get_data(model, partition)

    # Load feature_map
    model = load_feature_map(model, directory)

    # Log feature statistics

    logger.info("Feature Statistics")
    logger.info("Number of Prediction Rows    : %d", X_predict.shape[0])
    logger.info("Number of Prediction Columns : %d", X_predict.shape[1])

    # Apply treatments to the feature matrix
    all_features = apply_treatments(model, X_predict)

    # Drop features
    all_features = drop_features(all_features, drop)

    # Create initial features
    all_features = create_features(model, all_features)

    # Generate interactions
    all_features = create_interactions(model, all_features)

    # Remove low-variance features
    all_features = remove_lv_features(model, all_features)

    # Load the univariate support vector, if any

    if feature_selection:
        logger.info("Getting Univariate Support")
        try:
            support = model.feature_map['uni_support']
            all_features = all_features[:, support]
            logger.info("New Feature Count : %d", all_features.shape[1])
        except:
            logger.info("No Univariate Support")

    # Load the RFE support vector, if any

    if rfe:
        logger.info("Getting RFE Support")
        try:
            support = model.feature_map['rfe_support']
            all_features = all_features[:, support]
            logger.info("New Feature Count : %d", all_features.shape[1])
        except:
            logger.info("No RFE Support")

    # Load predictor
    predictor = load_predictor(directory)

    # Make predictions

    logger.info("Making Predictions")
    tag = 'BEST'
    model.preds[(tag, partition)] = predictor.predict(all_features)
    if model_type == ModelType.classification:
        model.probas[(tag,
                      partition)] = predictor.predict_proba(all_features)[:, 1]

    # Get date stamp to record file creation

    d = datetime.now()
    f = "%Y%m%d"
    timestamp = d.strftime(f)

    # Save predictions
    save_predictions(model, tag, partition)
示例#2
0
def prediction_pipeline(model):
    r"""AlphaPy Prediction Pipeline

    Parameters
    ----------
    model : alphapy.Model
        The model object for controlling the pipeline.

    Returns
    -------
    None : None

    Notes
    -----
    The saved model is loaded from disk, and predictions are made
    on the new testing data.

    """

    logger.info("Predict Mode")

    # Unpack the model specifications

    directory = model.specs['directory']
    drop = model.specs['drop']
    feature_selection = model.specs['feature_selection']
    model_type = model.specs['model_type']
    rfe = model.specs['rfe']

    # Get all data. We need original train and test for encodings.

    X_train, y_train = get_data(model, Partition.train)

    partition = Partition.predict
    X_predict, _ = get_data(model, partition)

    # Load feature_map
    model = load_feature_map(model, directory)

    # Log feature statistics

    logger.info("Feature Statistics")
    logger.info("Number of Prediction Rows    : %d", X_predict.shape[0])
    logger.info("Number of Prediction Columns : %d", X_predict.shape[1])

    # Apply transforms to the feature matrix
    X_all = apply_transforms(model, X_predict)

    # Drop features
    X_all = drop_features(X_all, drop)

    # Create initial features
    X_all = create_features(model, X_all, X_train, X_predict, y_train)

    # Generate interactions
    X_all = create_interactions(model, X_all)

    # Remove low-variance features
    X_all = remove_lv_features(model, X_all)

    # Load the univariate support vector, if any

    if feature_selection:
        logger.info("Getting Univariate Support")
        try:
            support = model.feature_map['uni_support']
            X_all = X_all[:, support]
            logger.info("New Feature Count : %d", X_all.shape[1])
        except:
            logger.info("No Univariate Support")

    # Load the RFE support vector, if any

    if rfe:
        logger.info("Getting RFE Support")
        try:
            support = model.feature_map['rfe_support']
            X_all = X_all[:, support]
            logger.info("New Feature Count : %d", X_all.shape[1])
        except:
            logger.info("No RFE Support")

    # Load predictor
    predictor = load_predictor(directory)

    # Make predictions

    logger.info("Making Predictions")
    tag = 'BEST'
    model.preds[(tag, partition)] = predictor.predict(X_all)
    if model_type == ModelType.classification:
        model.probas[(tag, partition)] = predictor.predict_proba(X_all)[:, 1]

    # Save predictions
    save_predictions(model, tag, partition)

    # Return the model
    return model