Exemplo n.º 1
0
def train(
    original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path, model_data_path, output_result_path
):
    if b_load_train_feat:
        feat_df = _load_features(feature_data_home, "*train*.csv")
    else:
        feat_df = _calc_train_features(original_data_path, preprocessed_dir, excel, sheet_name, feature_data_path)

    x_data = feat_df.drop(subjective_column_name, axis=1, level=0)
    y_data = feat_df[subjective_column_name]

    # simple linear models to train on color score
    color_x = x_data[hue_column_name]
    color_y = y_data[color_column_name]
    color_models = ColorRegression(feature_dimensions(color_x), feature_dimensions(color_y))
    model_score_dict = {color_models.__class__.__name__: color_models.validation(color_x, color_y, 0.25)}
    color_models.save(model_data_path)

    # kinds of models to train on quality score
    quality_x = x_data
    quality_y = y_data[quality_column_name]
    quality_models = QualityRegression(feature_dimensions(quality_x), feature_dimensions(quality_y))
    model_score_dict.update({quality_models.__class__.__name__: quality_models.validation(quality_x, quality_y, 0.25)})
    quality_models.save(model_data_path)

    # modes to train on both color and quality
    mixed_x = x_data
    mixed_y = y_data[[color_column_name, quality_column_name]]
    mixed_models = MixedRegression(feature_dimensions(mixed_x), feature_dimensions(mixed_y))
    model_score_dict.update({mixed_models.__class__.__name__: mixed_models.validation(mixed_x, mixed_y, 0.25)})
    mixed_models.save(model_data_path)

    # store cross_validation scores
    PublicSupport.write_json(
        model_score_dict,
        os.path.join(output_result_path, "model_score" + datetime.now().strftime("%Y-%m-%d %H.%M.%S") + ".json"),
    )

    return color_models, quality_models, mixed_models
Exemplo n.º 2
0
def load_models(model_data_path):
    return (
        ColorRegression.deserialize_regression(model_data_path),
        QualityRegression.deserialize_regression(model_data_path),
        MixedRegression.deserialize_regression(model_data_path),
    )