envs = load_dotenv(find_dotenv()) file = os.getenv("lib") sys.path.insert(0, file) from utils import LoadData from preprocessing import PreProcessing from visuals import ClassifierVisual # Import data dataset = LoadData("Churn_Modelling.csv").data X = dataset.iloc[:, 3:13].values y = dataset.iloc[:, 13].values # Lets do some preprocessing... processor = PreProcessing() # Encode the data (Country/Gender) X[:, 1] = processor.encode(X[:, 1]) X[:, 2] = processor.encode(X[:, 2]) X = processor.hot_encoding(data=X, features=[1]) X = X[:, 1:] # Split the data into training+test X_train, X_test, y_train, y_test = processor.split(X, y, test_size=0.2) # Fitting XGboost classifier = XGBClassifier() classifier.fit(X_train, y_train) # Predicting the test results y_pred = classifier.predict(X_test) # Making the confusion matrix
np.around(X, decimals=0).astype(int) # Splitting the data into train adn test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Feature scaling sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) #-----------Using the PreProcessing class from preprocessing.py:--------------- processor = PreProcessing() # Fill Missing data X[:, 1:3] = processor.replace_with_mean(X[:, 1:3]) # Handle categorical data X[:, 0] = processor.encode(X[:, 0]) X = processor.hot_encoding(X, features=[0]) # Encode target y = processor.encode(y) # Split the data X_train, X_test, y_train, y_test = processor.split(X, y, test_size=0.2, random_state=0) # Feature Scaling X_train = processor.fit_scaler(X_train) X_test = processor.scale(X_test)