Predicted insurance claims accounts import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error, mean_squared_error import matplotlib.pyplot as plt import seaborn as sns
df = pd.read_csv('insurance.csv')
print(df.head())
le = LabelEncoder() df['sex'] = le.fit_transform(df['sex']) df['smoker'] = le.fit_transform(df['smoker']) df['region'] = le.fit_transform(df['region'])
X = df.drop(['charges'], axis=1) y = df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test)
model = LinearRegression() model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print("Mean Absolute Error (MAE):", mae) print("Root Mean Squared Error (RMSE):", rmse)
plt.figure(figsize=(8, 6)) sns.scatterplot(x='bmi', y='charges', data=df) plt.title('Impact of BMI on Insurance Charges') plt.show()
plt.figure(figsize=(8, 6)) sns.scatterplot(x='age', y='charges', data=df) plt.title('Impact of Age on Insurance Charges') plt.show()
plt.figure(figsize=(8, 6)) sns.boxplot(x='smoker', y='charges', data=df) plt.title('Impact of Smoking Status on Insurance Charges') plt.show()