Exemplo n.º 1
0
def test_raises_error_if_df_contains_na(df_enc_big, df_enc_big_na):
    # test case 4: when dataset contains na, fit method
    with pytest.raises(ValueError):
        encoder = OneHotEncoder()
        encoder.fit(df_enc_big_na)

    # test case 4: when dataset contains na, transform method
    with pytest.raises(ValueError):
        encoder = OneHotEncoder()
        encoder.fit(df_enc_big)
        encoder.transform(df_enc_big_na)
Exemplo n.º 2
0
security_doors = word_convert(
    st.selectbox('Do you want security doors ?', ('Yes', 'No')))
cctv = word_convert(
    st.selectbox('Do you want CCTV surveillance ?', ('Yes', 'No')))
bq = word_convert(st.selectbox('Do you want Boys Quarters ?', ('Yes', 'No')))
gym = word_convert(st.selectbox('Do you need gym facilities ?', ('Yes', 'No')))
pool = word_convert(st.selectbox('Do you need swimming pool ?', ('Yes', 'No')))

# Modeling step

# Encoding Step
encode = OneHotEncoder()
target = data['Price']
features = data.drop('Price', 1)
encode.fit(features)
features = encode.transform(features)

# Getting the target and features variables

# print(data.head())

X_train, X_test, y_train, y_test = train_test_split(features,
                                                    target,
                                                    test_size=0.2,
                                                    random_state=0)
# Creating the algorithm class
model = RandomForestRegressor()
# Creating algorithm object
model.fit(X_train, y_train)
# Predicted values
estimate = pd.DataFrame(
Exemplo n.º 3
0
num_features = df.select_dtypes(include=['int64', 'float64']).drop(
    'Survived', axis=1).columns
num_features
# %%
cat_features = df.select_dtypes(include=['category', 'object']).columns
cat_features
#%%
features = df.drop('Survived', axis=1).columns.to_list()
features
# %%
onehot = OneHotEncoder(variables=['Pclass', 'Sex', 'Embarked'],
                       drop_last=False)

# %%
onehot.fit(df[features])
onehot.transform(df[features]).head()
# %%
X = onehot.transform(df[features])
y = df['Survived']
print(X.shape)
# %%
# Separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

logistic_model = LogisticRegression(penalty='l2', C=1.0,
                                    solver='liblinear').fit(X_train, y_train)
print(logistic_model)
# %%
 def encorder(self, y):
     """Y dataframe"""
     encode = OneHotEncoder()
     encode.fit(y)
     return encode.transform(y)