def predict() : data = flask.request.json print("in service") print(data) titanic_test = pd.DataFrame(data) #print(titanic_test.info()) model_objects = joblib.load(os.path.join(dir,'titanic_model_1.pkl') ) titanic_test1 = utils.drop_features(titanic_test, ['PassengerId', 'Name', 'Ticket', 'Cabin']) utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked']) cat_features = utils.get_categorical_features(titanic_test1) #print(cat_features) cont_features = utils.get_continuous_features(titanic_test1) #print(cont_features) titanic_test1[cat_features] = model_objects.get('cat_imputers').transform(titanic_test1[cat_features]) titanic_test1[cont_features] = model_objects.get('cont_imputers').transform(titanic_test1[cont_features]) utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked']) titanic_test1['Sex'] = titanic_test1['Sex'].cat.add_categories(['male', 'female']) titanic_test1['Pclass'] = titanic_test1['Pclass'].cat.add_categories([1,2,3]) titanic_test1['Embarked'] = titanic_test1['Embarked'].cat.add_categories(['S','Q','C']) #print(titanic_test1.info()) titanic_test2 = utils.ohe(titanic_test1, cat_features) print(titanic_test2.shape) X_test = model_objects.get('scaler').transform(titanic_test2) result = model_objects.get('estimator').predict(X_test) print(result) return flask.jsonify(prediction=str(1))
house_train.shape house_train.info() house_test = pd.read_csv(os.path.join(path, "test.csv")) house_test.shape house_test.info() house = pd.concat((house_train, house_test), axis=0) house.shape house.info() features_to_cast = ['MSSubClass'] cast_cont_to_cat(house, features_to_cast) print(get_continuous_features(house)) print(get_categorical_features(house)) features_to_drop = ['Id', 'SalePrice'] features_to_drop.extend(get_features_to_drop_on_missingdata(house, 0.25)) house1 = drop_features(house, features_to_drop) house1.info() imputable_cat_features = get_categorical_features(house1) cat_imputer = get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = get_continuous_features(house1) cont_imputer = get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features])
import classification_utils as cutils import kernel_utils as kutils dir = 'E:/' titanic_train = pd.read_csv(os.path.join(dir, 'train.csv')) print(titanic_train.shape) print(titanic_train.info()) titanic_train1 = utils.drop_features( titanic_train, ['PassengerId', 'Name', 'Survived', 'Ticket', 'Cabin']) #type casting utils.cast_to_cat(titanic_train1, ['Sex', 'Pclass', 'Embarked']) cat_features = utils.get_categorical_features(titanic_train1) print(cat_features) cont_features = utils.get_continuous_features(titanic_train1) print(cont_features) #handle missing data(imputation) cat_imputers = utils.get_categorical_imputers(titanic_train1, cat_features) titanic_train1[cat_features] = cat_imputers.transform( titanic_train1[cat_features]) cont_imputers = utils.get_continuous_imputers(titanic_train1, cont_features) titanic_train1[cont_features] = cont_imputers.transform( titanic_train1[cont_features]) #adding new levels #titanic_train['Pclass'] = titanic_train['Pclass'].cat.add_categories([4,5])
hue="Survived", data=titanic, kind="count", size=6) sns.countplot(x='Cabin', data=titanic) titanic['Cabin'] = titanic['Cabin'].fillna('U') titanic = utils.drop_features(titanic, ['PassengerId', 'Name', 'Survived', 'Ticket']) #type casting utils.cast_to_cat( titanic, ['Sex', 'Pclass', 'Embarked', 'Title', 'FamilyGroup', 'Cabin']) cat_features = utils.get_categorical_features(titanic) print(cat_features) cont_features = utils.get_continuous_features(titanic) print(cont_features) #handle missing data(imputation) cat_imputers = utils.get_categorical_imputers(titanic, cat_features) titanic[cat_features] = cat_imputers.transform(titanic[cat_features]) cont_imputers = utils.get_continuous_imputers(titanic, cont_features) titanic[cont_features] = cont_imputers.transform(titanic[cont_features]) #one hot encoding titanic = utils.ohe(titanic, cat_features) #scale the data scaler = preprocessing.StandardScaler()