#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import numpy as np
import time
from joblib import dump
from joblib import load
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble  import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import explained_variance_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.inspection import plot_partial_dependence
import shap
shap.initjs()
#Eliminate warnings
import warnings
warnings.filterwarnings('ignore')


# In[2]:


def preprocess_data(df, test_size=0.2, random_state=48):
    x = df.drop(['entry_id','T(℃)'], axis=1)
    y = df['T(℃)']
    xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=test_size, random_state=random_state)
    minmax_scaler = MinMaxScaler()
    xtrain_normalized = minmax_scaler.fit_transform(xtrain)
    xtest_normalized = minmax_scaler.fit_transform(xtest)
    return xtrain_normalized, xtest_normalized, ytrain, ytest,x


# In[3]:


def plot_predictions(yTrue, yPredict, model_score,model_name,file_name):
    df_model = pd.DataFrame({
        'yTrue': yTrue,
        'yPredict': yPredict
    })
    df_sorted = df_model.sort_values(by='yTrue')
    plt.plot(np.array(df_sorted.yTrue)[:], color='#1f77b4', label='yTrue')  # blue
    plt.plot(np.array(df_sorted.yPredict)[:], color='#d62728', label='yPredict')  # red
    plt.legend()
    plt.title(model_name+" "+"True and Predicted Values"+" "+"["+file_name+"]")
    plt.xlabel('Index')
    plt.ylabel("Temperature (℃)")
    plt.text(0.4, 0.95, f'R2: {model_score[0]:.3f}', ha='left', va='top', transform=plt.gca().transAxes)
    plt.text(0.4, 0.90, f'RMSE: {model_score[1]:.3f}', ha='left', va='top', transform=plt.gca().transAxes)
    plt.text(0.4, 0.85, f'MAE: {model_score[2]:.3f}', ha='left', va='top', transform=plt.gca().transAxes)
    plt.text(0.4, 0.80, f'EV: {model_score[3]:.3f}', ha='left', va='top', transform=plt.gca().transAxes)
    plt.show()


# In[4]:


def calculate_r2_rmse_mae_ev(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    ev = explained_variance_score(y_true, y_pred)   #explained_variance
    result_str = f"R2 score: {r2:.3f}\nRMSE: {rmse:.3f}\nMAE: {mae:.3f}\nExplained Variance: {ev:.3f}"
    print("R2 Score:", r2)
    print("RMSE:", rmse)
    print("MAE:", mae)
    print("EV:", ev)
    return r2, rmse, mae, ev


# In[5]:


def save_best_model(model, model_name, dataset_name):
    model_path = f"{model_name}_{dataset_name}.pkl"
    dump(model, model_path)
    print("Best model saved to:", model_path)


# In[6]:


def train_and_save_best_mpr_model(xtrain, ytrain, file_name):
    poly = PolynomialFeatures()
    elastic_net_model = ElasticNet()
    pipeline = make_pipeline(poly, elastic_net_model)
    param_grid = {
        'polynomialfeatures__degree': [1, 2],
        'elasticnet__alpha': [0.01, 0.1, 1],
        'elasticnet__l1_ratio': [0.1, 0.5, 1]
    }
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(xtrain, ytrain)
    mpr_best = grid_search.best_estimator_
    mpr_best.fit(xtrain, ytrain)
    save_best_model(mpr_best, 'mpr_best', file_name)
    best_params = grid_search.best_params_
    print("mpr best parameters:", best_params)
    print('end\n')
    return mpr_best


# In[7]:


def train_and_save_best_svm_model(xtrain, ytrain, file_name):
    svm = SVR()
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['rbf'],
        'gamma': [0.1, 1, 10]
    }
    grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(xtrain, ytrain)
    svm_best = grid_search.best_estimator_
    svm_best.fit(xtrain, ytrain)
    save_best_model(svm_best, 'svm_best', file_name)
    best_params = grid_search.best_params_
    print("svm best parameters:", best_params)
    print('end\n')
    return svm_best


# In[8]:


def train_and_save_best_knn_model(xtrain, ytrain, file_name):
    knn_model = KNeighborsRegressor()
    param_grid = {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
    }
    grid_search = GridSearchCV(estimator=knn_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(xtrain, ytrain)
    knn_best = grid_search.best_estimator_
    knn_best.fit(xtrain, ytrain)
    save_best_model(knn_best, 'knn_best', file_name)
    print("knn best parameters:", grid_search.best_params_)
    print('end\n')
    return knn_best


# In[9]:


def train_and_save_best_mlp_model(xtrain, ytrain, file_name):
    mlp = MLPRegressor(random_state=48)
    param_grid = {
        'hidden_layer_sizes': [(100, 50), (50, 25), (200,)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01],
        'solver': ['adam', 'sgd'],
        'learning_rate': ['constant', 'adaptive'],
    }
    # Random _ state is set to make readers get similar results.
    cv = KFold(n_splits=5, shuffle=True, random_state=48)
    grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(xtrain, ytrain)
    mlp_best = grid_search.best_estimator_
    save_best_model(mlp_best, 'mlp_best', file_name)
    print("mlp best parameters:", grid_search.best_params_)
    print('end\n')
    return mlp_best


# In[10]:


def train_and_save_best_rfr_model(xtrain, ytrain, file_name):
    param_grid = {
        'n_estimators': [10, 50, 100, 200],
        'max_depth': [5, 10, 15, 20],
        'max_features': [5, 10, 15, 20],
        'min_impurity_decrease': [0.0, 0.01, 0.05, 0.1]
    }
    # random_state=48： Ensure that the same results are obtained every time the model is trained with the same data and parameters
    rfr = RandomForestRegressor(random_state=48)
    grid_search = GridSearchCV(estimator=rfr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(xtrain, ytrain)
    rfr_best = grid_search.best_estimator_
    save_best_model(rfr_best, 'rfr_best', file_name)
    print('rfr best model:', rfr_best)
    print('end\n')
    return rfr_best


# In[11]:


def train_and_save_best_xgb_model(xtrain, ytrain, file_name):
    param_grid = { 
            'n_estimators': [100, 200, 300],  
            'learning_rate': [0.01, 0.05, 0.1],  
            'max_depth': [3, 5, 7],  
            'colsample_bytree': [0.5, 0.7],  
            'gamma': [0, 0.25, 1.0]  
        }
    xg_reg = xgb.XGBRegressor(objective='reg:squarederror', n_jobs=-1)
    grid_search = GridSearchCV(estimator=xg_reg, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(xtrain, ytrain)
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_
    xgb_best = xgb.XGBRegressor(objective='reg:squarederror', **best_params)  
    xgb_best.fit(xtrain, ytrain)  
    print('xgb best parameters:', best_params)
    print('xgb best estimator:', best_estimator)
    save_best_model(xgb_best, 'xgb_best', file_name)
    print('end\n')
    return xgb_best


# In[12]:


# plot R2 RMSE MAE EV
def plot_scores(score_mpr_xtrain, score_svm_xtrain, score_knn_xtrain, score_mlp_xtrain, score_rfr_xtrain, score_xgb_xtrain,
                score_mpr_xtest, score_svm_xtest, score_knn_xtest, score_mlp_xtest, score_rfr_xtest, score_xgb_xtest, file_name):
    r2_values_train = [score_mpr_xtrain[0], score_svm_xtrain[0], score_knn_xtrain[0], score_mlp_xtrain[0], score_rfr_xtrain[0], score_xgb_xtrain[0]]
    rmse_values_train = [score_mpr_xtrain[1], score_svm_xtrain[1], score_knn_xtrain[1], score_mlp_xtrain[1], score_rfr_xtrain[1], score_xgb_xtrain[1]]
    mae_values_train = [score_mpr_xtrain[2], score_svm_xtrain[2], score_knn_xtrain[2], score_mlp_xtrain[2], score_rfr_xtrain[2], score_xgb_xtrain[2]]
    explained_variance_values_train = [score_mpr_xtrain[3], score_svm_xtrain[3], score_knn_xtrain[3], score_mlp_xtrain[3], score_rfr_xtrain[3], score_xgb_xtrain[3]]
    r2_values_test = [score_mpr_xtest[0], score_svm_xtest[0], score_knn_xtest[0], score_mlp_xtest[0], score_rfr_xtest[0], score_xgb_xtest[0]]
    rmse_values_test = [score_mpr_xtest[1], score_svm_xtest[1], score_knn_xtest[1], score_mlp_xtest[1], score_rfr_xtest[1], score_xgb_xtest[1]]
    mae_values_test = [score_mpr_xtest[2], score_svm_xtest[2], score_knn_xtest[2], score_mlp_xtest[2], score_rfr_xtest[2], score_xgb_xtest[2]]
    explained_variance_values_test = [score_mpr_xtest[3], score_svm_xtest[3], score_knn_xtest[3], score_mlp_xtest[3], score_rfr_xtest[3], score_xgb_xtest[3]]
    labels = ['MPR', 'SVM', 'KNN', 'MLP', 'RFR', 'XGB']
    fig, axs = plt.subplots(2, 2, figsize=(15, 10))
    axs[0, 0].plot(labels, r2_values_train, marker='o', color='tab:blue', label='Train R2')
    for i, r2 in enumerate(r2_values_train):
        axs[0, 0].text(labels[i], r2, f'{r2:.3f}', ha='left', va='top', fontsize=12, color='tab:blue')
    axs[0, 0].plot(labels, r2_values_test, marker='^', color='tab:orange', label='Test R2')
    for i, r2 in enumerate(r2_values_test):
        axs[0, 0].text(labels[i], r2, f'{r2:.3f}', ha='right', va='bottom', fontsize=12, color='tab:orange')
    axs[0, 0].set_title('(a) R2 Values' + ' [' + file_name + ']')
    axs[0, 0].set_xlabel('Models')
    axs[0, 0].set_ylabel('R2')
    axs[0, 0].legend()
    # Plot train set RMSE
    axs[0, 1].plot(labels, rmse_values_train, marker='o', color='tab:blue', label='Train RMSE')
    for i, rmse in enumerate(rmse_values_train):
        axs[0, 1].text(labels[i], rmse, f'{rmse:.3f}', ha='left', va='top', fontsize=12, color='tab:blue')
    # Plot test set RMSE
    axs[0, 1].plot(labels, rmse_values_test, marker='^', color='tab:orange', label='Test RMSE')
    for i, rmse in enumerate(rmse_values_test):
        axs[0, 1].text(labels[i], rmse, f'{rmse:.3f}', ha='right', va='bottom', fontsize=12, color='tab:orange')
    axs[0, 1].set_title('(b) RMSE Values' + ' [' + file_name + ']')
    axs[0, 1].set_xlabel('Models')
    axs[0, 1].set_ylabel('RMSE')
    axs[0, 1].legend()
    # Plot train set MAE
    axs[1, 0].plot(labels, mae_values_train, marker='o', color='tab:blue', label='Train MAE')
    for i, mae in enumerate(mae_values_train):
        axs[1, 0].text(labels[i], mae, f'{mae:.3f}', ha='left', va='top', fontsize=12, color='tab:blue')
    # Plot test set MAE
    axs[1, 0].plot(labels, mae_values_test, marker='^', color='tab:orange', label='Test MAE')
    for i, mae in enumerate(mae_values_test):
        axs[1, 0].text(labels[i], mae, f'{mae:.3f}', ha='right', va='bottom', fontsize=12, color='tab:orange')
    axs[1, 0].set_title('(c) MAE Values' + ' [' + file_name + ']')
    axs[1, 0].set_xlabel('Models')
    axs[1, 0].set_ylabel('MAE')
    axs[1, 0].legend()
    # Plot train set EV
    axs[1, 1].plot(labels, explained_variance_values_train, marker='o', color='tab:blue', label='Train EV')
    for i, explained_variance in enumerate(explained_variance_values_train):
        axs[1, 1].text(labels[i], explained_variance, f'{explained_variance:.3f}', ha='left', va='top', fontsize=12, color='tab:blue')
    # Plot test set EV
    axs[1, 1].plot(labels, explained_variance_values_test, marker='^', color='tab:orange', label='Test EV')
    for i, explained_variance in enumerate(explained_variance_values_test):
        axs[1, 1].text(labels[i], explained_variance, f'{explained_variance:.3f}', ha='right', va='bottom', fontsize=12, color='tab:orange')
    axs[1, 1].set_title('(d) EV Values' + ' [' + file_name + ']')
    axs[1, 1].set_xlabel('Models')
    axs[1, 1].set_ylabel('EV')
    axs[1, 1].legend()
    plt.tight_layout()
    plt.show()


# In[13]:


# Import data sets
df_dabiesulu = pd.read_excel('1dabiesuluData_2282.xlsx')
file_name_dabiesulu="DabieSulu"
df_earth = pd.read_excel('2earthData_997.xlsx')
file_name_earth="Earth"
# Normalization and partitioning of datasets
pre_dabiesulu=preprocess_data(df_dabiesulu, test_size=0.2, random_state=48)
pre_earth=preprocess_data(df_earth, test_size=0.2, random_state=48)


# # Main

# In[14]:


# train model -dabiesulu
mpr_best_dabiesulu=train_and_save_best_mpr_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)  
svm_best_dabiesulu=train_and_save_best_svm_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)
knn_best_dabiesulu=train_and_save_best_knn_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)
mlp_best_dabiesulu=train_and_save_best_mlp_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)
rfr_best_dabiesulu=train_and_save_best_rfr_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)
xgb_best_dabiesulu=train_and_save_best_xgb_model(pre_dabiesulu[0], pre_dabiesulu[2], file_name_dabiesulu)
# train model -earth
mpr_best_earth=train_and_save_best_mpr_model(pre_earth[0], pre_earth[2], file_name_earth)  
svm_best_earth=train_and_save_best_svm_model(pre_earth[0], pre_earth[2], file_name_earth)
knn_best_earth=train_and_save_best_knn_model(pre_earth[0], pre_earth[2], file_name_earth)
mlp_best_earth=train_and_save_best_mlp_model(pre_earth[0], pre_earth[2], file_name_earth)
rfr_best_earth=train_and_save_best_rfr_model(pre_earth[0], pre_earth[2], file_name_earth)
xgb_best_earth=train_and_save_best_xgb_model(pre_earth[0], pre_earth[2], file_name_earth)


# In[ ]:


# Best model saved to: mpr_best_DabieSulu.pkl
# mpr best parameters: {'elasticnet__alpha': 0.1, 'elasticnet__l1_ratio': 1, 'polynomialfeatures__degree': 2}
# end

# Best model saved to: svm_best_DabieSulu.pkl
# svm best parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}
# end

# Best model saved to: knn_best_DabieSulu.pkl
# knn best parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
# end

# Best model saved to: mlp_best_DabieSulu.pkl
# mlp best parameters: {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (200,), 'learning_rate': 'adaptive', 'solver': 'sgd'}
# end

# Best model saved to: rfr_best_DabieSulu.pkl
# rfr best model: RandomForestRegressor(max_depth=20, max_features=20, random_state=48)
# end

# xgb best parameters: {'colsample_bytree': 0.7, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 300}
# xgb best estimator: XGBRegressor(base_score=None, booster=None, callbacks=None,
#              colsample_bylevel=None, colsample_bynode=None,
#              colsample_bytree=0.7, device=None, early_stopping_rounds=None,
#              enable_categorical=False, eval_metric=None, feature_types=None,
#              gamma=0, grow_policy=None, importance_type=None,
#              interaction_constraints=None, learning_rate=0.1, max_bin=None,
#              max_cat_threshold=None, max_cat_to_onehot=None,
#              max_delta_step=None, max_depth=3, max_leaves=None,
#              min_child_weight=None, missing=nan, monotone_constraints=None,
#              multi_strategy=None, n_estimators=300, n_jobs=-1,
#              num_parallel_tree=None, random_state=None, ...)
# Best model saved to: xgb_best_DabieSulu.pkl
# end

# Best model saved to: mpr_best_Earth.pkl
# mpr best parameters: {'elasticnet__alpha': 0.1, 'elasticnet__l1_ratio': 1, 'polynomialfeatures__degree': 2}
# end

# Best model saved to: svm_best_Earth.pkl
# svm best parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}
# end

# Best model saved to: knn_best_Earth.pkl
# knn best parameters: {'algorithm': 'ball_tree', 'n_neighbors': 5, 'weights': 'distance'}
# end

# Best model saved to: mlp_best_Earth.pkl
# mlp best parameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (200,), 'learning_rate': 'adaptive', 'solver': 'sgd'}
# end

# Best model saved to: rfr_best_Earth.pkl
# rfr best model: RandomForestRegressor(max_depth=15, max_features=20, min_impurity_decrease=0.1,
#                       random_state=48)
# end

# xgb best parameters: {'colsample_bytree': 0.7, 'gamma': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
# xgb best estimator: XGBRegressor(base_score=None, booster=None, callbacks=None,
#              colsample_bylevel=None, colsample_bynode=None,
#              colsample_bytree=0.7, device=None, early_stopping_rounds=None,
#              enable_categorical=False, eval_metric=None, feature_types=None,
#              gamma=1.0, grow_policy=None, importance_type=None,
#              interaction_constraints=None, learning_rate=0.1, max_bin=None,
#              max_cat_threshold=None, max_cat_to_onehot=None,
#              max_delta_step=None, max_depth=3, max_leaves=None,
#              min_child_weight=None, missing=nan, monotone_constraints=None,
#              multi_strategy=None, n_estimators=200, n_jobs=-1,
#              num_parallel_tree=None, random_state=None, ...)
# Best model saved to: xgb_best_Earth.pkl
# end


# In[15]:


# # from joblib import dump
# # from joblib import load
# # Load the model
# mpr_best_dabiesulu=load('mpr_best_DabieSulu.pkl')
# svm_best_dabiesulu=load('svm_best_DabieSulu.pkl')
# knn_best_dabiesulu=load('knn_best_DabieSulu.pkl')
# mlp_best_dabiesulu=load('mlp_best_DabieSulu.pkl')
# rfr_best_dabiesulu=load('rfr_best_DabieSulu.pkl')
# xgb_best_dabiesulu=load('xgb_best_DabieSulu.pkl')
# mpr_best_earth=load('mpr_best_Earth.pkl')
# svm_best_earth=load('svm_best_Earth.pkl')
# knn_best_earth=load('knn_best_Earth.pkl')
# mlp_best_earth=load('mlp_best_Earth.pkl')
# rfr_best_earth=load('rfr_best_Earth.pkl')
# xgb_best_earth=load('xgb_best_Earth.pkl')


# In[16]:


# evaluation dabiesulu
# MPR - dabiesulu
# xtrain
y_pred_mpr_xtrain_dabiesulu = mpr_best_dabiesulu.predict(pre_dabiesulu[0])
score_mpr_xtrain_dabiesulu=calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_mpr_xtrain_dabiesulu)
plot_predictions(pre_dabiesulu[2],y_pred_mpr_xtrain_dabiesulu, score_mpr_xtrain_dabiesulu,"MPR(Train)",file_name_dabiesulu)
# xtest
y_pred_mpr_xtest_dabiesulu = mpr_best_dabiesulu.predict(pre_dabiesulu[1])
score_mpr_xtest_dabiesulu=calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_mpr_xtest_dabiesulu)
plot_predictions(pre_dabiesulu[3],y_pred_mpr_xtest_dabiesulu, score_mpr_xtest_dabiesulu,"MPR (Test)",file_name_dabiesulu)
# SVM - dabiesulu
# xtrain
y_pred_svm_xtrain_dabiesulu = svm_best_dabiesulu.predict(pre_dabiesulu[0])
score_svm_xtrain_dabiesulu=calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_svm_xtrain_dabiesulu)
plot_predictions(pre_dabiesulu[2],y_pred_svm_xtrain_dabiesulu, score_svm_xtrain_dabiesulu,"SVM(Train)",file_name_dabiesulu)
# xtest
y_pred_svm_xtest_dabiesulu = svm_best_dabiesulu.predict(pre_dabiesulu[1])
score_svm_xtest_dabiesulu=calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_svm_xtest_dabiesulu)
plot_predictions(pre_dabiesulu[3],y_pred_svm_xtest_dabiesulu, score_svm_xtest_dabiesulu,"SVM (Test)",file_name_dabiesulu)
# KNN - dabiesulu  
# xtrain  
y_pred_knn_xtrain_dabiesulu = knn_best_dabiesulu.predict(pre_dabiesulu[0])  
score_knn_xtrain_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_knn_xtrain_dabiesulu)  
plot_predictions(pre_dabiesulu[2], y_pred_knn_xtrain_dabiesulu, score_knn_xtrain_dabiesulu, "KNN(Train)",file_name_dabiesulu)  
# xtest  
y_pred_knn_xtest_dabiesulu = knn_best_dabiesulu.predict(pre_dabiesulu[1])  
score_knn_xtest_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_knn_xtest_dabiesulu)  
plot_predictions(pre_dabiesulu[3], y_pred_knn_xtest_dabiesulu, score_knn_xtest_dabiesulu, "KNN (Test)",file_name_dabiesulu)
# MLP - dabiesulu  
# xtrain  
y_pred_mlp_xtrain_dabiesulu = mlp_best_dabiesulu.predict(pre_dabiesulu[0])  
score_mlp_xtrain_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_mlp_xtrain_dabiesulu)  
plot_predictions(pre_dabiesulu[2], y_pred_mlp_xtrain_dabiesulu, score_mlp_xtrain_dabiesulu, "MLP(Train)",file_name_dabiesulu)  
# xtest  
y_pred_mlp_xtest_dabiesulu = mlp_best_dabiesulu.predict(pre_dabiesulu[1])  
score_mlp_xtest_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_mlp_xtest_dabiesulu)  
plot_predictions(pre_dabiesulu[3], y_pred_mlp_xtest_dabiesulu, score_mlp_xtest_dabiesulu, "MLP (Test)",file_name_dabiesulu)
# RFR - dabiesulu  
# xtrain  
y_pred_rfr_xtrain_dabiesulu = rfr_best_dabiesulu.predict(pre_dabiesulu[0])  
score_rfr_xtrain_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_rfr_xtrain_dabiesulu)  
plot_predictions(pre_dabiesulu[2], y_pred_rfr_xtrain_dabiesulu, score_rfr_xtrain_dabiesulu, "RFR(Train)",file_name_dabiesulu)  
# xtest  
y_pred_rfr_xtest_dabiesulu = rfr_best_dabiesulu.predict(pre_dabiesulu[1])  
score_rfr_xtest_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_rfr_xtest_dabiesulu)  
plot_predictions(pre_dabiesulu[3], y_pred_rfr_xtest_dabiesulu, score_rfr_xtest_dabiesulu, "RFR (Test)",file_name_dabiesulu)
# XGB - dabiesulu  
# xtrain  
y_pred_xgb_xtrain_dabiesulu = xgb_best_dabiesulu.predict(pre_dabiesulu[0])  
score_xgb_xtrain_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[2], y_pred_xgb_xtrain_dabiesulu)  
plot_predictions(pre_dabiesulu[2], y_pred_xgb_xtrain_dabiesulu, score_xgb_xtrain_dabiesulu, "XGB (Train)",file_name_dabiesulu)  
# xtest  
y_pred_xgb_xtest_dabiesulu = xgb_best_dabiesulu.predict(pre_dabiesulu[1])  
score_xgb_xtest_dabiesulu = calculate_r2_rmse_mae_ev(pre_dabiesulu[3], y_pred_xgb_xtest_dabiesulu)  
plot_predictions(pre_dabiesulu[3], y_pred_xgb_xtest_dabiesulu, score_xgb_xtest_dabiesulu, "XGB (Test)",file_name_dabiesulu)


# In[17]:


# evaluation earth
# MPR - earth
# xtrain
y_pred_mpr_xtrain_earth = mpr_best_earth.predict(pre_earth[0])
score_mpr_xtrain_earth=calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_mpr_xtrain_earth)
plot_predictions(pre_earth[2],y_pred_mpr_xtrain_earth, score_mpr_xtrain_earth,"MPR(Train)",file_name_earth)
# xtest
y_pred_mpr_xtest_earth = mpr_best_earth.predict(pre_earth[1])
score_mpr_xtest_earth=calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_mpr_xtest_earth)
plot_predictions(pre_earth[3],y_pred_mpr_xtest_earth, score_mpr_xtest_earth,"MPR (Test)",file_name_earth)
# SVM - earth
# xtrain
y_pred_svm_xtrain_earth = svm_best_earth.predict(pre_earth[0])
score_svm_xtrain_earth=calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_svm_xtrain_earth)
plot_predictions(pre_earth[2],y_pred_svm_xtrain_earth, score_svm_xtrain_earth,"SVM(Train)",file_name_earth)
# xtest
y_pred_svm_xtest_earth = svm_best_earth.predict(pre_earth[1])
score_svm_xtest_earth=calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_svm_xtest_earth)
plot_predictions(pre_earth[3],y_pred_svm_xtest_earth, score_svm_xtest_earth,"SVM (Test)",file_name_earth)
# KNN - earth  
# xtrain  
y_pred_knn_xtrain_earth = knn_best_earth.predict(pre_earth[0])  
score_knn_xtrain_earth = calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_knn_xtrain_earth)  
plot_predictions(pre_earth[2], y_pred_knn_xtrain_earth, score_knn_xtrain_earth, "KNN(Train)",file_name_earth)  
# xtest  
y_pred_knn_xtest_earth = knn_best_earth.predict(pre_earth[1])  
score_knn_xtest_earth = calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_knn_xtest_earth)  
plot_predictions(pre_earth[3], y_pred_knn_xtest_earth, score_knn_xtest_earth, "KNN (Test)",file_name_earth)
# MLP - earth  
# xtrain  
y_pred_mlp_xtrain_earth = mlp_best_earth.predict(pre_earth[0])  
score_mlp_xtrain_earth = calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_mlp_xtrain_earth)  
plot_predictions(pre_earth[2], y_pred_mlp_xtrain_earth, score_mlp_xtrain_earth, "MLP(Train)",file_name_earth)  
# xtest  
y_pred_mlp_xtest_earth = mlp_best_earth.predict(pre_earth[1])  
score_mlp_xtest_earth = calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_mlp_xtest_earth)  
plot_predictions(pre_earth[3], y_pred_mlp_xtest_earth, score_mlp_xtest_earth, "MLP (Test)",file_name_earth)
# RFR - earth  
# xtrain  
y_pred_rfr_xtrain_earth = rfr_best_earth.predict(pre_earth[0])  
score_rfr_xtrain_earth = calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_rfr_xtrain_earth)  
plot_predictions(pre_earth[2], y_pred_rfr_xtrain_earth, score_rfr_xtrain_earth, "RFR(Train)",file_name_earth)  
# xtest  
y_pred_rfr_xtest_earth = rfr_best_earth.predict(pre_earth[1])  
score_rfr_xtest_earth = calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_rfr_xtest_earth)  
plot_predictions(pre_earth[3], y_pred_rfr_xtest_earth, score_rfr_xtest_earth, "RFR (Test)",file_name_earth)
# XGB - earth  
# xtrain  
y_pred_xgb_xtrain_earth = xgb_best_earth.predict(pre_earth[0])  
score_xgb_xtrain_earth = calculate_r2_rmse_mae_ev(pre_earth[2], y_pred_xgb_xtrain_earth)  
plot_predictions(pre_earth[2], y_pred_xgb_xtrain_earth, score_xgb_xtrain_earth, "XGB (Train)",file_name_earth)  
# xtest  
y_pred_xgb_xtest_earth = xgb_best_earth.predict(pre_earth[1])  
score_xgb_xtest_earth = calculate_r2_rmse_mae_ev(pre_earth[3], y_pred_xgb_xtest_earth)  
plot_predictions(pre_earth[3], y_pred_xgb_xtest_earth, score_xgb_xtest_earth, "XGB (Test)",file_name_earth)


# In[18]:


#plot dabiesulu
plot_scores(score_mpr_xtrain_dabiesulu, score_svm_xtrain_dabiesulu, score_knn_xtrain_dabiesulu, score_mlp_xtrain_dabiesulu, score_rfr_xtrain_dabiesulu, score_xgb_xtrain_dabiesulu,
                score_mpr_xtest_dabiesulu, score_svm_xtest_dabiesulu, score_knn_xtest_dabiesulu, score_mlp_xtest_dabiesulu, score_rfr_xtest_dabiesulu, score_xgb_xtest_dabiesulu, 
                file_name_dabiesulu)
#plot earth
plot_scores(score_mpr_xtrain_earth, score_svm_xtrain_earth, score_knn_xtrain_earth, score_mlp_xtrain_earth, score_rfr_xtrain_earth, score_xgb_xtrain_earth,
            score_mpr_xtest_earth, score_svm_xtest_earth, score_knn_xtest_earth, score_mlp_xtest_earth, score_rfr_xtest_earth, score_xgb_xtest_earth, 
            file_name_earth)


# In[19]:


def plot_predictions_custom(yTrue, yPredict, model_score, model_name, ax):
    df_model = pd.DataFrame({
        'yTrue': yTrue,
        'yPredict': yPredict
    })
    df_sorted = df_model.sort_values(by='yTrue')
    ax.plot(np.array(df_sorted.yTrue)[:], color='#1f77b4', label='yTrue')  # blue
    ax.plot(np.array(df_sorted.yPredict)[:], color='#d62728', label='yPredict')  # red
    ax.legend()
    ax.set_title(model_name + ":" + "True and Predicted Values "+"["+file_name_dabiesulu+"]")
    ax.set_xlabel('Index')
    ax.set_ylabel("Temperature (°C)")
    ax.text(0.4, 0.95, f'R2: {model_score[0]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.90, f'RMSE: {model_score[1]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.85, f'MAE: {model_score[2]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.80, f'EV: {model_score[3]:.3f}', ha='left', va='top', transform=ax.transAxes)
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
plot_predictions_custom(pre_dabiesulu[3], y_pred_mpr_xtest_dabiesulu, score_mpr_xtest_dabiesulu, "(a) MPR", axs[0, 0])
plot_predictions_custom(pre_dabiesulu[3], y_pred_svm_xtest_dabiesulu, score_svm_xtest_dabiesulu, "(b) SVM", axs[0, 1])
plot_predictions_custom(pre_dabiesulu[3], y_pred_knn_xtest_dabiesulu, score_knn_xtest_dabiesulu, "(c) KNN", axs[0, 2])
plot_predictions_custom(pre_dabiesulu[3], y_pred_mlp_xtest_dabiesulu, score_mlp_xtest_dabiesulu, "(d) MLP", axs[1, 0])
plot_predictions_custom(pre_dabiesulu[3], y_pred_rfr_xtest_dabiesulu, score_rfr_xtest_dabiesulu, "(e) RFR", axs[1, 1])
plot_predictions_custom(pre_dabiesulu[3], y_pred_xgb_xtest_dabiesulu, score_xgb_xtest_dabiesulu, "(f) XGB", axs[1, 2])
axs[0, 0].text(-0.15, 1.1, 'Basic Model:', transform=axs[0, 0].transAxes, fontsize=12, fontweight='bold', va='top')
axs[1, 0].text(-0.15, 1.1, 'Complex Model:', transform=axs[1, 0].transAxes, fontsize=12, fontweight='bold', va='top')
plt.tight_layout(h_pad=2.5, w_pad=2.5)  # Increase vertical and horizontal spacing without changing aspect ratio
plt.show()


# In[20]:


def plot_predictions_custom(yTrue, yPredict, model_score, model_name, ax):
    df_model = pd.DataFrame({
        'yTrue': yTrue,
        'yPredict': yPredict
    })
    df_sorted = df_model.sort_values(by='yTrue')
    ax.plot(np.array(df_sorted.yTrue)[:], color='#1f77b4', label='yTrue')  # blue
    ax.plot(np.array(df_sorted.yPredict)[:], color='#d62728', label='yPredict')  # red
    ax.legend()
    ax.set_title(model_name + ":" + "True and Predicted Values "+"["+file_name_earth+"]")
    ax.set_xlabel('Index')
    ax.set_ylabel("Temperature (°C)")
    ax.text(0.4, 0.95, f'R2: {model_score[0]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.90, f'RMSE: {model_score[1]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.85, f'MAE: {model_score[2]:.3f}', ha='left', va='top', transform=ax.transAxes)
    ax.text(0.4, 0.80, f'EV: {model_score[3]:.3f}', ha='left', va='top', transform=ax.transAxes)
    
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
plot_predictions_custom(pre_earth[3], y_pred_mpr_xtest_earth, score_mpr_xtest_earth, "(a) MPR", axs[0, 0])
plot_predictions_custom(pre_earth[3], y_pred_svm_xtest_earth, score_svm_xtest_earth, "(b) SVM", axs[0, 1])
plot_predictions_custom(pre_earth[3], y_pred_knn_xtest_earth, score_knn_xtest_earth, "(c) KNN", axs[0, 2])
plot_predictions_custom(pre_earth[3], y_pred_mlp_xtest_earth, score_mlp_xtest_earth, "(d) MLP", axs[1, 0])
plot_predictions_custom(pre_earth[3], y_pred_rfr_xtest_earth, score_rfr_xtest_earth, "(e) RFR", axs[1, 1])
plot_predictions_custom(pre_earth[3], y_pred_xgb_xtest_earth, score_xgb_xtest_earth, "(f) XGB", axs[1, 2])
axs[0, 0].text(-0.15, 1.1, 'Basic Model:', transform=axs[0, 0].transAxes, fontsize=12, fontweight='bold', va='top')
axs[1, 0].text(-0.15, 1.1, 'Complex Model:', transform=axs[1, 0].transAxes, fontsize=12, fontweight='bold', va='top')
plt.tight_layout(h_pad=2.5, w_pad=2.5)  # Increase vertical and horizontal spacing without changing aspect ratio
plt.show()


# In[21]:


def plot_feature_importance(df_importance, include_ti_feature=True, ax=None, file_name=''):   
    if not include_ti_feature:  
        df_importance = df_importance.iloc[1:]  
    if ax is None:  
        plt.figure(figsize=(10, 6))  
        ax = plt.gca()  
    sns.barplot(x='Importance', y='Feature', data=df_importance, palette="muted", ax=ax)  
    if include_ti_feature:  
        ax.set_title('Importance Of Trace Elements [Ti_ppm Included] [' + file_name + ']')  
    else:  
        ax.set_title('Importance Of Trace Elements [Ti_ppm Not Included] [' + file_name + ']')  
    ax.set_xlabel('Importance')  
    ax.set_ylabel('Trace Elements')  
    ax.title.set_fontsize(12)
    for index, value in enumerate(df_importance['Importance']):  
        ax.text(value, index, round(value, 2), va='center')  
    sns.despine(ax=ax)  
    return ax  

fig, axs = plt.subplots(2, 2, figsize=(12, 11))
feature_importance_dabiesulu = xgb_best_dabiesulu.feature_importances_ * 100
feature_importance_earth = xgb_best_earth.feature_importances_ * 100
columns = pre_dabiesulu[4].columns[:]
feature_names = list(columns)
feature_importance_df_dabiesulu = pd.DataFrame({  
    'Feature': columns,  
    'Importance': feature_importance_dabiesulu  
})  
feature_importance_df_earth = pd.DataFrame({  
    'Feature': columns,  
    'Importance': feature_importance_earth  
})   
feature_importance_df_dabiesulu_sorted = feature_importance_df_dabiesulu.sort_values('Importance', ascending=False)  
feature_importance_df_earth_sorted = feature_importance_df_earth.sort_values('Importance', ascending=False)  
index=50
feature_importance_dabiesulu = feature_importance_df_dabiesulu_sorted.head(index)
feature_importance_earth = feature_importance_df_earth_sorted.head(index)
plot_feature_importance(feature_importance_dabiesulu, include_ti_feature=True, ax=axs[0, 0])  
axs[0, 0].set_title('(a) Importance Of Trace Elements [Ti_ppm Included] [' + file_name_dabiesulu + ']').set_fontsize(11)  
plot_feature_importance(feature_importance_dabiesulu, include_ti_feature=False, ax=axs[1, 0])  
axs[1, 0].set_title('(b) Importance Of Trace Elements [Ti_ppm Not Included] [' + file_name_dabiesulu + ']').set_fontsize(11)  
plot_feature_importance(feature_importance_earth, include_ti_feature=True, ax=axs[0, 1])  
axs[0, 1].set_title('(c) Importance Of Trace Elements [Ti_ppm Included] [' + file_name_earth + ']').set_fontsize(11)   
plot_feature_importance(feature_importance_earth, include_ti_feature=False, ax=axs[1, 1])  
axs[1, 1].set_title('(d) Importance Of Trace Elements [Ti_ppm Not Included] [' + file_name_earth + ']').set_fontsize(11)  
plt.tight_layout()    
plt.show()


# In[28]:


#shap.summary_plot
xtrain_df = pd.DataFrame(pre_dabiesulu[0], columns=feature_names)
explainer = shap.TreeExplainer(xgb_best_dabiesulu)
shap_values = explainer.shap_values(xtrain_df)
plt.title('SHAP summary plot [ti_ppm included] [DabieSulu] \n')
shap.summary_plot(shap_values, xtrain_df, plot_type="dot", max_display=15,plot_size="auto")
max_index = np.unravel_index(np.abs(shap_values).argmax(), shap_values.shape)
shap_values[:, max_index[1]] =np.nan
plt.title('(a) SHAP summary plot [ti_ppm not included] [DabieSulu] \n'.format())
shap.summary_plot(shap_values, xtrain_df, plot_type="dot", max_display=15,plot_size="auto")


# In[36]:


#shap.decision_plot
xtrain_df = pd.DataFrame(pre_dabiesulu[0], columns=feature_names)
explainer = shap.TreeExplainer(xgb_best_dabiesulu)
shap_values = explainer.shap_values(xtrain_df)
plt.title('SHAP Decision Plot [ti_ppm included] [DabieSulu] \n')
shap.decision_plot(explainer.expected_value, shap_values, feature_names=feature_names)
shap.decision_plot(explainer.expected_value, shap_values[0], feature_names=feature_names)
plt.show()
ti_ppm_index = feature_names.index("ti_ppm")
shap_values_without_ti_ppm = np.delete(shap_values, ti_ppm_index, axis=1)
feature_names_without_ti_ppm = feature_names[:ti_ppm_index] + feature_names[ti_ppm_index+1:]
plt.title('(a) SHAP Decision Plot superposition [ti_ppm not included] [DabieSulu] \n').set_fontsize(13)  
shap.decision_plot(explainer.expected_value, shap_values_without_ti_ppm, feature_names=feature_names_without_ti_ppm)
plt.title('(b) SHAP Decision Plot [ti_ppm not included] [DabieSulu] \n').set_fontsize(13)  
shap.decision_plot(explainer.expected_value, shap_values_without_ti_ppm[0], feature_names=feature_names_without_ti_ppm)
plt.show()


# In[29]:


# shap.summary_plot earth
xtrain_df_earth = pd.DataFrame(pre_earth[0], columns=feature_names)
explainer = shap.TreeExplainer(xgb_best_earth)
shap_values_earth = explainer.shap_values(xtrain_df_earth)
plt.title('SHAP summary plot [ti_ppm included] [Earth] \n')
shap.summary_plot(shap_values_earth, xtrain_df_earth, plot_type="dot", max_display=15,plot_size="auto")
max_index = np.unravel_index(np.abs(shap_values_earth).argmax(), shap_values.shape)
shap_values_earth[:, max_index[1]] =np.nan
plt.title('(b) SHAP summary plot [ti_ppm not included] [Earth] \n'.format())
shap.summary_plot(shap_values_earth, xtrain_df_earth, plot_type="dot", max_display=15,plot_size="auto")


# In[38]:


# shap.decision_plot
plt.title('SHAP Decision Plot [ti_ppm included] [Earth] \n')
shap.decision_plot(explainer.expected_value, shap_values_earth, feature_names=feature_names)
shap.decision_plot(explainer.expected_value, shap_values_earth[20], feature_names=feature_names) 
plt.show()
ti_ppm_index = feature_names.index("ti_ppm")
shap_values_without_ti_ppm = np.delete(shap_values_earth, ti_ppm_index, axis=1)
feature_names_without_ti_ppm = feature_names[:ti_ppm_index] + feature_names[ti_ppm_index+1:]
plt.title('(c) SHAP Decision Plot superposition [ti_ppm not included] [Earth] \n').set_fontsize(13) 
shap.decision_plot(explainer.expected_value, shap_values_without_ti_ppm, feature_names=feature_names_without_ti_ppm)
plt.title('(d) SHAP Decision Plot [ti_ppm not included] [DabieSulu] \n').set_fontsize(13) 
shap.decision_plot(explainer.expected_value, shap_values_without_ti_ppm[100], feature_names=feature_names_without_ti_ppm)
plt.show()


# In[26]:


# waterfall  shap.force_plot
shap_values = shap.TreeExplainer(xgb_best_earth).shap_values(pre_earth[0])
shap_exp = shap.Explanation(values=shap_values, base_values=explainer.expected_value, data=xtrain_df_earth)
sample_index = 300
shap.plots.waterfall(shap_exp[sample_index])
shap.force_plot(explainer.expected_value, shap_values[sample_index,:], xtrain_df_earth.iloc[sample_index,:])


# In[27]:


# dabiesulu Partial Dependence Plots
feature_importance = xgb_best_dabiesulu.feature_importances_
columns = pre_dabiesulu[4].columns[:]
feature_names = list(columns)
df_importance = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
df_importance = df_importance.sort_values(by='Importance', ascending=False)
top_features = df_importance['Feature'].tolist()[:12]  # Choose the five most important features
fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(25, 15))
fig.suptitle('Partial Dependence Plots [top 12] [DabieSulu]', fontsize=18)
for i, feature_name in enumerate(top_features):
    feature_index = feature_names.index(feature_name)
    plot_partial_dependence(xgb_best_dabiesulu, pre_dabiesulu[0], features=[feature_index], ax=axs[i//4, i%4])
    axs[i//4, i%4].set_title(feature_name)
for ax in axs.flat:
    ax.set_xlabel(feature_name)
    ax.set_ylabel('Model Response')
plt.subplots_adjust(top=0.9)
plt.show()
#earth   Partial Dependence Plots
feature_importance = xgb_best_earth.feature_importances_
columns = pre_earth[4].columns[:]
feature_names = list(columns)
df_importance = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
df_importance = df_importance.sort_values(by='Importance', ascending=False)
top_features = df_importance['Feature'].tolist()[:12]  # Choose the five most important features
fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(25, 15))
fig.suptitle('Partial Dependence Plots [top 12] [Earth]', fontsize=18)
for i, feature_name in enumerate(top_features):
    feature_index = feature_names.index(feature_name)
    plot_partial_dependence(xgb_best_earth, pre_earth[0], features=[feature_index], ax=axs[i//4, i%4])
    axs[i//4, i%4].set_title(feature_name)
for ax in axs.flat:
    ax.set_xlabel(feature_name)
    ax.set_ylabel('Model Response')
plt.subplots_adjust(top=0.9)
plt.show()

