import numpy as np import os import pickle import concurrent.futures import pandas as pd import matplotlib.pyplot as plt from smt.surrogate_models import KRG from smt.surrogate_models import GENN from pymoo.algorithms.moo.nsga2 import NSGA2 from pymoo.optimize import minimize from pymoo.core.problem import Problem from pymoo.operators.crossover.sbx import SBX from pymoo.operators.mutation.pm import PM class CaseData: def __init__(self, CaseID=None, pipeNum=None, length=None, sapcing=None, innerD=None, outerD=None, thickness=None, H2_molarfraction=None, inlet_temp=None, inlet_velocity=None, operation_pressure=None, washcoatfactor=None, molar_sum_op=None, co_cov_op=None, h2_cov_op=None, ch4_sel_op=None, c3h8_sel_op=None, c8h18_sel_op=None, co2_sel_op=None, temp_ave_op=None, temp_center_op=None, temp_max_op=None, yield_=None, temp_diff=None, inv_yield=None): self.CaseID = CaseID self.pipeNum = pipeNum self.length = length self.sapcing = sapcing self.innerD = innerD self.outerD = outerD self.thickness = thickness self.H2_molarfraction = H2_molarfraction self.inlet_temp = inlet_temp self.inlet_velocity = inlet_velocity self.operation_pressure = operation_pressure self.washcoatfactor = washcoatfactor self.molar_sum_op = molar_sum_op self.co_cov_op = co_cov_op self.h2_cov_op = h2_cov_op self.ch4_sel_op = ch4_sel_op self.c3h8_sel_op = c3h8_sel_op self.c8h18_sel_op = c8h18_sel_op self.co2_sel_op = co2_sel_op self.temp_ave_op = temp_ave_op self.temp_center_op = temp_center_op self.temp_max_op = temp_max_op self.yield_ = yield_ self.temp_diff = temp_diff self.inv_yield = inv_yield def __repr__(self): return f"CaseData({', '.join(f'{k}={getattr(self, k)!r}' for k in self.__dict__)})" D1_excel_path = 'listdata.xlsx' excel_path = D1_excel_path sheets = pd.read_excel(excel_path, sheet_name=None) all_cases = {} for sheet_name, df in sheets.items(): case_list = [] for _, row in df.iterrows(): case = CaseData( CaseID=row.get('CaseID'), pipeNum=row.get('pipeNum'), length=row.get('length'), sapcing=row.get('spacing'), innerD=row.get('innerD'), outerD=row.get('outerD'), thickness=row.get('thickness'), H2_molarfraction=row.get('H2_molarfraction'), inlet_temp=row.get('inlet_temp'), inlet_velocity=row.get('inlet_velocity'), operation_pressure=row.get('operation_pressure'), washcoatfactor=row.get('washcoatfactor'), molar_sum_op=row.get('molar_sum-op'), co_cov_op=row.get('co_cov-op'), h2_cov_op=row.get('h2_cov-op'), ch4_sel_op=row.get('ch4_sel-op'), c3h8_sel_op=row.get('c3h8_sel-op'), c8h18_sel_op=row.get('c8h18_sel-op'), co2_sel_op=row.get('co2_sel-op'), temp_ave_op=row.get('temp_ave-op'), temp_center_op=row.get('temp_center-op'), temp_max_op=row.get('temp_max-op'), yield_=row.get('yield'), temp_diff=row.get('temp-diff'), inv_yield=row.get('1/yield') ) case_list.append(case) all_cases[sheet_name] = case_list print(f'Total sheets processed: {len(all_cases["train"])}') print(f'Total cases in "train" sheet: {len(all_cases["validate"])}') print(f'Total cases in "train" sheet: {len(all_cases["test"])}') fields = ['length', 'sapcing', 'innerD', 'thickness', 'inv_yield', 'temp_diff'] train_data = {field: [getattr(case, field) for case in all_cases.get('train', [])] for field in fields} validate_data = {field: [getattr(case, field) for case in all_cases.get('validate', [])] for field in fields} test_data = {field: [getattr(case, field) for case in all_cases.get('test', [])] for field in fields} print('train:', len(train_data['length'])) print('validate:', len(validate_data['length'])) print('test:', len(test_data['length'])) train_input = np.array(list(zip(train_data['length'], train_data['sapcing'], train_data['innerD'], train_data['thickness']))) train_result = np.array(list(zip(train_data['inv_yield'], train_data['temp_diff']))) validate_input = np.array(list(zip(validate_data['length'], validate_data['sapcing'], validate_data['innerD'], validate_data['thickness']))) validate_result = np.array(list(zip(validate_data['inv_yield'], validate_data['temp_diff']))) test_input = np.array(list(zip(test_data['length'], test_data['sapcing'], test_data['innerD'], test_data['thickness']))) test_result = np.array(list(zip(test_data['inv_yield'], test_data['temp_diff']))) KRG_params_1 = { 'theta0': [1e2] * 4, 'theta_bounds': [1e-6, 1000], 'corr': 'squar_exp', 'poly': 'quadratic', 'hyper_opt': 'Cobyla', 'eval_noise': True, 'noise0': [1e-5], 'noise_bounds': [1e-10, 1e10], 'print_global': False } KRG_params_2 = { 'theta0': [1e2] * 4, 'theta_bounds': [1e-6, 1000], 'corr': 'squar_exp', 'poly': 'quadratic', 'eval_noise': True, 'noise0': [1e-5], 'noise_bounds': [1e-10, 1e10], 'print_global': False } genn_non_neg1 = { 'hidden_layer_sizes':[30,30], 'is_normalize':True, 'print_global': False, 'seed':1 } genn_non_neg2= { 'hidden_layer_sizes':[30,30], 'is_normalize':True, 'print_global': False, 'seed':1 } def cache_or_train(model, train_input, train_result, cache_path): if os.path.exists(cache_path): with open(cache_path, 'rb') as f: model = pickle.load(f) print(f"Loaded model from {cache_path}") else: model.set_training_values(train_input, train_result) model.train() with open(cache_path, 'wb') as f: pickle.dump(model, f) print(f"Trained and cached model to {cache_path}") return model model_genn_y1= KRG(**KRG_params_1) model_genn_y2= KRG(**KRG_params_2) cache_path1 = './model_cache3/KRG_2_Y1.pkl' cache_path2 = './model_cache3/KRG_2_Y2.pkl' def train_and_predict_y1(predict_input): model = KRG(**KRG_params_1) model = cache_or_train(model, train_input, train_result[:,0], cache_path1) return model.predict_values(predict_input),model def train_and_predict_y2(predict_input): model = KRG(**KRG_params_2) model = cache_or_train(model, train_input, train_result[:,1], cache_path2) return model.predict_values(predict_input),model with concurrent.futures.ThreadPoolExecutor() as executor: future_y1_krg = executor.submit(train_and_predict_y1, validate_input) future_y2_krg = executor.submit(train_and_predict_y2, validate_input) validate_output_y1_krg,model_krg_y1 = future_y1_krg.result() validate_output_y2_krg,model_krg_y2 = future_y2_krg.result() def calc_mre_rmse(true, pred): true, pred = np.ravel(true), np.ravel(pred) nae = np.mean(np.abs(true - pred) / (np.max(true) - np.min(true))) nrmse = np.sqrt(np.mean((true - pred) ** 2)) / (np.max(true) - np.min(true)) return nae, nrmse nae_y1, nrmse_y1 = calc_mre_rmse(validate_result[:,0], validate_output_y1_krg) nae_y2, nrmse_y2 = calc_mre_rmse(validate_result[:,1], validate_output_y2_krg) print("KRG Model Performance on Test Set:") print(f"inv_yield: MRE={nae_y1:.4f}, RMSE={nrmse_y1:.4f}") print(f"temp_diff: MRE={nae_y2:.4f}, RMSE={nrmse_y2:.4f}") x_min = np.array([30.0, 0.05, 0.5, 0.25]) x_max = np.array([200.0,2.5, 4, 6.5]) class MyProblem(Problem): def __init__(self): super().__init__(n_var=4, n_obj=2, n_constr=2, xl=x_min, xu=x_max) def _evaluate(self, x, out, *args, **kwargs): f1 = model_krg_y1.predict_values(x) f2 = model_krg_y2.predict_values(x) out["F"] = np.column_stack([f1, f2]) out["G"] = [-f2,0.01-f1] algorithm1 = NSGA2( pop_size=2000, n_offsprings=1000, crossover=SBX(prob=0.7, eta=5), mutation=PM(prob=0.2, eta=5), eliminate_duplicates=True, ) class MyCallback: def __init__(self): self.all_F = [] def notify(self, algorithm): self.all_F.append(algorithm.pop.get("F")) def __call__(self, algorithm): self.notify(algorithm) callback1 = MyCallback() Main_pro = MyProblem() res1 = minimize(Main_pro, algorithm1, ('n_gen', 50), seed=3, verbose=False, callback=callback1) F1 = res1.F X1 = res1.X print(len(F1), len(X1)) def plot_and_save_search_points(callback, F, sheet_name, excel_writer): all_points = np.vstack(callback.all_F) set_F = set(map(tuple, F)) mask = np.array([tuple(row) not in set_F for row in all_points]) all_points_unique = all_points[mask] num_gens = len(callback.all_F) plt.figure(figsize=(8, 6)) for i, gen_F in enumerate(callback.all_F): alpha = 0.2 + 0.8 * (1 - i / (num_gens - 1)) if num_gens > 1 else 1.0 plt.scatter(gen_F[:, 0], gen_F[:, 1], s=10, alpha=alpha, color='black', marker='+', label=f'Gen {i+1}' if i in [0, num_gens-1] else None) plt.scatter(F[:, 0], F[:, 1], s=30, facecolors='none', edgecolors='red', marker='o', label=f'{sheet_name} Pareto Front') plt.scatter(train_data['inv_yield'], train_data['temp_diff'], s=25, marker='x', color='blue', label='Train') plt.scatter(validate_data['inv_yield'], validate_data['temp_diff'], s=25, marker='s', color='green', label='Validate') plt.scatter(test_data['inv_yield'], test_data['temp_diff'], s=25, marker='^', color='purple', label='Test') plt.title(f"All Sampled Points & Original Data during {sheet_name} Search") plt.xlabel("inv_yield") plt.ylabel("temp_diff") plt.legend(loc='best') plt.tight_layout() plt.show() df_samples = pd.DataFrame({ 'Sampled_inv_yield': all_points_unique[:, 0], 'Sampled_temp_diff': all_points_unique[:, 1] }) df_pareto = pd.DataFrame({ 'Pareto_inv_yield': F[:, 0], 'Pareto_temp_diff': F[:, 1] }) df_samples.to_excel(excel_writer, sheet_name=f'{sheet_name}_Samples', index=False) df_pareto.to_excel(excel_writer, sheet_name=f'{sheet_name}_Pareto', index=False) def save_sorted_variables_to_excel(X, F, excel_writer): combined = np.hstack([X, F]) sort_idx = np.argsort(combined[:, 4]) sorted_6d = combined[sort_idx] df_var = pd.DataFrame( sorted_6d, columns=['length', 'spacing', 'innerD', 'thickness', 'inv_yield', 'temp_diff'] ) df_var.to_excel(excel_writer, sheet_name='Sorted_Variables', index=False) def save_bounds_to_excel(x_lb, x_ub, excel_writer): df = pd.DataFrame({'x_min': x_lb, 'x_max': x_ub}, index=['length', 'spacing', 'innerD', 'thickness']) df.to_excel(excel_writer, sheet_name='Bounds') output_filename = 'result_algorithm_search2_genn.xlsx' with pd.ExcelWriter(output_filename, engine='openpyxl') as writer: plot_and_save_search_points(callback1, F1, 'NSGA2', writer) save_sorted_variables_to_excel(X1, F1, writer) save_bounds_to_excel(x_min, x_max, writer) combined = np.hstack([res1.X, res1.F]) sort_idx = np.argsort(combined[:, 4]) sorted_6d = combined[sort_idx] sorted_X = sorted_6d[:, :4] sorted_F = sorted_6d[:, 4:] names = ['length', 'spacing', 'innerD', 'thickness', 'inv_yield (f1)', 'temp_diff (f2)'] fig, axes = plt.subplots(6, 1, figsize=(8, 12), sharex=True) for i in range(6): y = sorted_6d[:, i] axes[i].scatter(np.arange(len(y)), y, s=8, alpha=0.6) axes[i].set_ylabel(names[i]) axes[i].grid(True) axes[-1].set_xlabel('Sorted index (by f1)') plt.suptitle('All variables vs sorted index (by f1)', y=1.01) plt.tight_layout() plt.show()