Exemplo n.º 1
0
    def __init__(self, project: Project):
        self.change_collector = ChangeCollector(project)

        X_df, y_df = project.api.load_data()
        encoder = project.api.encoder
        y = encoder.fit_transform(y_df)
        self.X_df, self.y = subsample_data_for_validation(X_df, y)
Exemplo n.º 2
0
def validate_feature_acceptance(accepter_class, feature, features, X_df, y_df,
                                X_df_val, y_val, subsample):
    if subsample:
        X_df, y_df, X_df_val, y_val = subsample_data_for_validation(
            X_df, y_df, X_df_val, y_val)
    accepter = accepter_class(X_df, y_df, X_df_val, y_val, features, feature)
    return accepter.judge()
Exemplo n.º 3
0
def validate_feature_api(
    feature: Feature,
    X_df: pd.DataFrame,
    y_df: Union[pd.DataFrame, pd.Series],
    subsample: bool,
    log_advice: bool = False,
) -> bool:
    logger.debug(f'Validating feature {feature!r}')
    if subsample:
        X_df, y_df = subsample_data_for_validation(X_df, y_df)
    valid, failures, advice = check_from_class(FeatureApiCheck, feature, X_df,
                                               y_df)
    if valid:
        logger.info('Feature is valid')
    else:
        if log_advice:
            logger.info(
                'Feature is NOT valid; here is some advice for resolving the '
                'feature API issues.')
            for failure, advice_item in zip(failures, advice):
                logger.info(f'{failure}: {advice_item}')
        else:
            logger.info(f'Feature is NOT valid; failures were {failures}')

    return valid
Exemplo n.º 4
0
 def _load_validation_data(self, X_df: pd.DataFrame, y_df: pd.DataFrame,
                           subsample: bool):
     if X_df is None or y_df is None:
         _X_df, _y_df = self.api.load_data()
     if X_df is None:
         X_df = _X_df
     if y_df is None:
         y_df = _y_df
     if subsample:
         X_df, y_df = subsample_data_for_validation(X_df, y_df)
     return X_df, y_df
Exemplo n.º 5
0
def validate_feature_api(feature, X, y, subsample=False):
    logger.debug('Validating feature {feature!r}'.format(feature=feature))
    if subsample:
        X, y = subsample_data_for_validation(X, y)
    valid, failures = check_from_class(FeatureApiCheck, feature, X, y)
    if valid:
        logger.info('Feature is valid')
    else:
        logger.info('Feature is NOT valid; failures were {failures}'.format(
            failures=failures))
    return valid
Exemplo n.º 6
0
def validate_feature_acceptance(feature, X, y, subsample=False, path=None,
                                package=None):
    if package is not None:
        project = Project(package)
    elif path is not None:
        project = Project.from_path(path)
    else:
        project = Project.from_cwd()

    if subsample:
        X, y = subsample_data_for_validation(X, y)

    # build project
    result = project.build(X, y)

    # load accepter for this project
    Accepter = _load_class(project, 'validation.feature_accepter')
    accepter = Accepter(result.X_df, result.y, result.features, feature)
    return accepter.judge()
Exemplo n.º 7
0
    def __init__(self, project):
        self.change_collector = ChangeCollector(project)

        X, y = project.load_data()
        self.X, self.y = subsample_data_for_validation(X, y)