Import libraries and load data import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
df = pd.read_csv('Churn_Modelling.csv')
print(df.head())
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
print(df.isnull().sum())
le = LabelEncoder() df['Geography'] = le.fit_transform(df['Geography']) df['Gender'] = le.fit_transform(df['Gender'])
X = df.drop(['Exited'], axis=1) y = df['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test)
model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) print("Model Accuracy:", accuracy)
print("Classification Report:") print(classification_report(y_test, y_pred))
print("Confusion Matrix:") print(confusion_matrix(y_test, y_pred))
feature_importance = model.feature_importances_ feature_names = X.columns
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance}) feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
print(feature_importance_df)