def predict(data): # impute NA for var in config.CATEGORICAL_TO_IMPUTE: data[var] = pf.impute_na(data, var, replacement='Missing') data[config.NUMERICAL_TO_IMPUTE] = pf.impute_na( data, config.NUMERICAL_TO_IMPUTE, replacement=config.LOTFRONTAGE_MODE) # capture elapsed time data[config.YEAR_VARIABLE] = pf.elapsed_years(data, config.YEAR_VARIABLE, ref_var='YrSold') # log transform numerical variables for var in config.NUMERICAL_LOG: data[var] = pf.log_transform(data, var) # Group rare labels for var in config.CATEGORICAL_ENCODE: data[var] = pf.remove_rare_labels(data, var, config.FREQUENT_LABELS[var]) # encode variables for var in config.CATEGORICAL_ENCODE: data[var] = pf.encode_categorical(data, var, config.ENCODING_MAPPINGS[var]) # scale variables data = pf.scale_features(data[config.FEATURES], config.OUTPUT_SCALER_PATH) # make predictions predictions = pf.predict(data, config.OUTPUT_MODEL_PATH) return predictions
def predict(data): # imputar datos faltantes for var in config.CATEGORICAL_TO_IMPUTE: data[var] = pf.impute_na(data, var, replacement='Missing') data[config.NUMERICAL_TO_IMPUTE] = pf.impute_na(data, config.NUMERICAL_TO_IMPUTE, replacement=config.LOTFRONTAGE_MODE) # intervalos de tiempo data[config.YEAR_VARIABLE] = pf.elapsed_years(data, config.YEAR_VARIABLE, ref_var='YrSold') # transformación logarítmica for var in config.NUMERICAL_LOG: data[var] = pf.log_transform(data, var) # agrupación de etiquetas poco frecuentes for var in config.CATEGORICAL_ENCODE: data[var] = pf.remove_rare_labels(data, var, config.FREQUENT_LABELS[var]) # codificación de var. categóricas for var in config.CATEGORICAL_ENCODE: data[var] = pf.encode_categorical(data, var, config.ENCODING_MAPPINGS[var]) # escalar variables data = pf.scale_features(data[config.FEATURES], config.OUTPUT_SCALER_PATH) # obtener predicciones predictions = pf.predict(data, config.OUTPUT_MODEL_PATH) return predictions
data = pf.load_data(config.PATH_TO_DATASET) # dividir el set X_train, X_test, y_train, y_test = pf.divide_train_test(data, config.TARGET) # imputar variables categóricas for var in config.CATEGORICAL_TO_IMPUTE: X_train[var] = pf.impute_na(X_train, var, replacement='Missing') # imputar variables numéricas X_train[config.NUMERICAL_TO_IMPUTE] = pf.impute_na( X_train, config.NUMERICAL_TO_IMPUTE, replacement=config.LOTFRONTAGE_MODE) # intervalos de tiempo X_train[config.YEAR_VARIABLE] = pf.elapsed_years(X_train, config.YEAR_VARIABLE, ref_var='YrSold') # transformación logarítmica for var in config.NUMERICAL_LOG: X_train[var] = pf.log_transform(X_train, var) # agrupación de categorías poco frecuentes for var in config.CATEGORICAL_ENCODE: X_train[var] = pf.remove_rare_labels(X_train, var, config.FREQUENT_LABELS[var]) # codificación de variables categóricas for var in config.CATEGORICAL_ENCODE: X_train[var] = pf.encode_categorical(X_train, var, config.ENCODING_MAPPINGS[var])