Пример #1
0
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = ["Overall Qual", "Overall Cond"]


def calc_qual(df):
    return df["Overall Qual"] - df["Overall Cond"]


transformer = SimpleFunctionTransformer(calc_qual)
name = "Qual"
feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature
from ballet.eng import NullFiller, SimpleFunctionTransformer

input = ["Total Bsmt SF", "1st Flr SF", "2nd Flr SF"]
transformer = [
    SimpleFunctionTransformer(lambda df: df.sum(axis=1)),
    NullFiller()
]
name = "Total Area"
feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = 'BsmtFin SF 2'


def has_qual(ser):
    return (ser > 0).astype(int)


transformer = SimpleFunctionTransformer(has_qual)
name = 'Basement type 2 indicator'
feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature
from ballet.eng import GroupwiseTransformer, SimpleFunctionTransformer
from sklearn.impute import SimpleImputer


input = ['Lot Frontage', 'Neighborhood']
transformer = [
    SimpleFunctionTransformer(lambda df: df.set_index('Neighborhood',
                                                      append=True)),
    GroupwiseTransformer(
        SimpleImputer(strategy='median'),
        groupby_kwargs={'level': 'Neighborhood'},
        handle_error='ignore',
    ),
    SimpleImputer(strategy='median'),
]
name = 'Imputed lot frontage by median per neighborhood'
feature = Feature(input=input, transformer=transformer, name=name)
Пример #5
0
    +
    b"AAAGNvcHlfWHEFiFgGAAAAbl9qb2JzcQZOWAUAAABjb2VmX3EHY251bXB5LmNvcmUubXVsd"
    +
    b"GlhcnJheQpfcmVjb25zdHJ1Y3QKcQhjbnVtcHkKbmRhcnJheQpxCUsAhXEKQwFicQuHcQxS"
    +
    b"cQ0oSwFLBoVxDmNudW1weQpkdHlwZQpxD1gCAAAAZjhxEEsASwGHcRFScRIoSwNYAQAAADx"
    +
    b"xE05OTkr/////Sv////9LAHRxFGKJQzCs1DbrA2ATwJ+EvJ2cgXZAgOdkq43NAUBPL1Ngi7"
    +
    b"xiwDcFZw6sG1VAsa1vEjH/c0BxFXRxFmJYCQAAAF9yZXNpZHVlc3EXY251bXB5LmNvcmUub"
    +
    b"XVsdGlhcnJheQpzY2FsYXIKcRhoEkMI3Eon+jyJq0JxGYZxGlJxG1gFAAAAcmFua19xHEsGWAkAAABzaW5ndWxhcl9xHW"
    +
    b"gIaAlLAIVxHmgLh3EfUnEgKEsBSwaFcSFoEolDMB/pi7yTBxpByuth8u3RrEDOZGHPHa6qQK+4epK+NaJADgShEyZ3mkC"
    +
    b"rrRv+vjeVQHEidHEjYlgKAAAAaW50ZXJjZXB0X3EkaBhoEkMIC+mioaOz/kBxJYZxJlJxJ1gQAAAAX3NrbGVhcm5fdmVy"
    + b"c2lvbnEoWAYAAAAwLjIyLjFxKXViLg==")


def base64_model(df):
    magic = pickle.loads(base64.b64decode(secret))
    X = df[keys].values
    X[np.isnan(X)] = 0.0
    return magic.predict(X)


input = keys
transformer = [SimpleFunctionTransformer(base64_model)]
name = "Base64 Model"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #6
0
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = ["Overall Qual", "Overall Cond"]
transformer = SimpleFunctionTransformer(
    lambda df: df["Overall Qual"] - df["Overall Cond"]
)
name = "Adjusted Overall Quality"
feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = 'Open Porch SF'
transformer = SimpleFunctionTransformer(lambda ser: ser > 0)
name = 'Has open porch'
feature = Feature(input=input, transformer=transformer, name=name)
import pandas as pd
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer
from sklearn.preprocessing import OneHotEncoder


def calc_porch_type(df):
    # Porch features
    total_porch_area = df.sum(axis=1)
    porch_type = pd.Series("Unknown", index=df.index)
    porch_type[(total_porch_area == df["Enclosed Porch"])
               & (df["Enclosed Porch"] > 0)] = "Enclosed"
    porch_type[(total_porch_area == df["3Ssn Porch"])
               & (df["3Ssn Porch"] > 0)] = "3Ssn"
    porch_type[(total_porch_area == df["Open Porch SF"])
               & (df["Open Porch SF"] > 0)] = "Open"
    return porch_type


input = ["Enclosed Porch", "3Ssn Porch", "Open Porch SF"]
transformer = [
    SimpleFunctionTransformer(calc_porch_type),
    OneHotEncoder(),
]
name = "Porch Type (Cleaned, One-Hot Encoded)"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #9
0
import numpy as np
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = 'Lot Area'
transformer = SimpleFunctionTransformer(np.sqrt)
name = 'Sqrt Lot Area'
feature = Feature(input=input, transformer=transformer, name=name)
Пример #10
0
import numpy as np
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = ["Lot Area", "Lot Frontage"]


def fill_frontage(df):
    mask = df["Lot Frontage"].isnull()
    df["Lot Frontage"][mask] = np.sqrt(df["Lot Area"])[mask]
    return df["Lot Frontage"]


transformer = SimpleFunctionTransformer(fill_frontage)
name = "Lot Frontage Fill"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #11
0
import pandas as pd
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = ["Year Built", "Garage Yr Blt"]


def calc_age(df):
    return df["Year Built"].where(pd.notnull, df["Garage Yr Blt"])


transformer = SimpleFunctionTransformer(calc_age)
name = "Year built fill"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #12
0
from ballet import Feature
from ballet.eng import SimpleFunctionTransformer

input = ["Full Bath", "Half Bath"]
transformer = SimpleFunctionTransformer(lambda df: df["Full Bath"] + df["Half Bath"])
name = "Total Bath"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #13
0
from ballet import Feature
from ballet.eng import NullFiller, SimpleFunctionTransformer

input = ["Yr Sold", "Year Remod/Add"]
transformer = [
    SimpleFunctionTransformer(lambda df: df["Yr Sold"] - df["Year Remod/Add"]),
    NullFiller(),
]
name = "House Age"
feature = Feature(input=input, transformer=transformer, name=name)
Пример #14
0
from ballet import Feature
from ballet.eng import NullFiller, SimpleFunctionTransformer

input = ["Yr Sold", "Year Remod/Add"]


def calc_age(df):
    return df["Yr Sold"] - df["Year Remod/Add"]


transformer = [SimpleFunctionTransformer(calc_age), NullFiller()]
name = "Age"
feature = Feature(input=input, transformer=transformer, name=name)
from ballet import Feature
from ballet.eng import NamedFramer, SimpleFunctionTransformer
from sklearn.preprocessing import OneHotEncoder


# Zeros in 020-090 get cut off. This feature prepends them back.
def pad_zero(ser):
    return ser.astype(str).str.pad(3, side='left', fillchar='0')


input = 'MS SubClass'
transformer = [
    SimpleFunctionTransformer(pad_zero),
    NamedFramer('MS SubClass'),
    OneHotEncoder(),
]
name = 'Pad zeros in MS SubClass'
feature = Feature(input=input, transformer=transformer, name=name)
Пример #16
0
from ballet import Feature
from ballet.eng import NullFiller, SimpleFunctionTransformer

input = ["Total Bsmt SF", "1st Flr SF", "2nd Flr SF"]
transformer = [SimpleFunctionTransformer(lambda df: df.sum(axis=1)), NullFiller()]
name = "Total Area Calculation"
feature = Feature(input=input, transformer=transformer, name=name)