From b8ca24e07b080db6d60fb45cc4f8ec5d90930b6a Mon Sep 17 00:00:00 2001 From: workpc Date: Thu, 7 Aug 2025 18:59:40 +0800 Subject: [PATCH] =?UTF-8?q?=E8=81=9A=E7=83=AF=E7=83=83=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E7=89=B9=E5=BE=81=E9=87=8D=E8=A6=81=E5=BA=A6=E5=9B=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/dataread.py | 56 ++++++++++++++++++++++++++++++++++++ models/nerulforcastmodels.py | 47 +++++++++++++++++------------- 2 files changed, 83 insertions(+), 20 deletions(-) diff --git a/lib/dataread.py b/lib/dataread.py index 7659733..ac42870 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -553,6 +553,61 @@ def featureAnalysis(df, dataset, y): # plt.close() +# 计算特征重要度 +def feature_importance(X_train, y_train): + import lightgbm as lgb + import matplotlib.pyplot as plt + from xgboost import XGBRegressor + from xgboost import plot_importance + + # 特征选择 + plt.figure(figsize=(2, 15)) + + plt.rcParams['font.sans-serif'] = ['SimHei'] + + temp = XGBRegressor() + temp.fit(X_train, y_train) + + ax = plot_importance(temp) + fig = ax.figure + fig.set_size_inches(8, 7) + + # 修改图的标题,添加模型名称 + title = '特征重要度1' # 替换为你的模型名称 + ax.set_title(title) + + # plt.show() + # 保存图片 + plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) + plt.close() + config.logger.info('特征重要度1.png 已保存') + + # 创建一个 LGBMRegressor 对象并训练模型 + regressor = lgb.LGBMRegressor() + regressor.fit(X_train, y_train) + + # 设置图形大小(可选) + plt.figure(figsize=(30, 40)) + # 使用 plot_importance 函数来绘制特征重要性 + # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 + ax = importance_plot = lgb.plot_importance( + regressor, importance_type='gain') # 或者 'split' + # 设置标题和字体大小 + ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) + for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + + ax.get_xticklabels() + ax.get_yticklabels()): + item.set_fontsize(9) # 设置 x 轴、y 轴标签以及刻度标签的字体大小 + # 修改图的标题,添加模型名称 + # title = 'Feature Importance - LGBMRegressor ' # 替换为你的模型名称 + # ax.set_title(title) + # 保存图片 + plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) + + # 显示图形 + plt.close() + config.logger.info('特征重要度2.png 已保存') + + def corr_feature(df): # 重新命名列名,列名排序,y在第一个 df.reindex(['y'] + sorted(df.columns.difference(['y']))) @@ -1102,6 +1157,7 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t calculate_correlation(df=df) featureAnalysis(df, dataset=dataset, y='y') + return df diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index b142c6b..304b791 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -349,6 +349,11 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear config.logger.info("\nTesting set head:") config.logger.info(df_test.head()) + # 特征重要度 + X_train = df_train.drop(columns=['y']) + y_train = df_train['y'] + feature_importance(X_train=X_train, y_train=y_train) + models = [ NHITS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps), @@ -396,7 +401,8 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear # VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 # Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 - NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), + NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, + val_check_steps=val_check_steps, scaler_type='standard', ), # NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ), //报错 # HINT(h=horizon), @@ -461,9 +467,9 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear # 添加预测时间 df_predict['created_dt'] = end_time - # 预测结果保留整数(先四舍五入再转换为整数类型) - df_predict = df_predict.round().astype({col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']}) - + # 预测结果保留整数(先四舍五入再转换为整数类型) + df_predict = df_predict.round().astype( + {col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']}) # 保存预测值 df_predict.to_csv(os.path.join(config.dataset, "predict.csv"), index=False) @@ -2054,7 +2060,6 @@ def model_losss_juxiting(sqlitedb, end_time, is_fivemodels): names_df['columns'] = names_df.apply(add_rote_column, axis=1) except ValueError as e: print(e) - def add_upper_lower_bound(row): print(row['columns']) @@ -3343,7 +3348,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input stime = df3['ds'].iloc[0] etime = df3['ds'].iloc[-1] # 添加偏差率表格 - fivemodels = '、'.join(eval_df['模型(Model)'].values[:5]) # 字符串形式,后面写入字符串使用 + fivemodels = '、'.join( + eval_df['模型(Model)'].values[:5]) # 字符串形式,后面写入字符串使用 content.append(Graphs.draw_text( f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:')) # # 添加偏差率表格 @@ -3541,34 +3547,35 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input def pp_bdwd_png(global_config): best_bdwd_price = find_best_models( date=global_config['end_time'], global_config=global_config) - y_hat_yuedu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][-4:] + y_hat_yuedu = pd.DataFrame( + best_bdwd_price).T[['date', 'predictresult']][-4:] y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date']) # 绘制PP期货月度预测结果的图表 plot_pp_predict_result(y_hat_yuedu, global_config) - y_hat_zhoudu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][2:4] + y_hat_zhoudu = pd.DataFrame( + best_bdwd_price).T[['date', 'predictresult']][2:4] y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date']) - y_hat_zhoudu.drop(columns=['date'],inplace=True) + y_hat_zhoudu.drop(columns=['date'], inplace=True) print(y_hat_zhoudu) # 获取本周最佳模型的五日预测价格 five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv') week_price_modelname = best_bdwd_price['week_price']['model_name'] - five_days_predict_price = five_days_predict_price[['ds',week_price_modelname]] - five_days_predict_price['ds'] = pd.to_datetime(five_days_predict_price['ds']) - five_days_predict_price.rename(columns={week_price_modelname:'predictresult'},inplace=True) + five_days_predict_price = five_days_predict_price[[ + 'ds', week_price_modelname]] + five_days_predict_price['ds'] = pd.to_datetime( + five_days_predict_price['ds']) + five_days_predict_price.rename( + columns={week_price_modelname: 'predictresult'}, inplace=True) # 设置索引 次日 次二日 次三日 次四日 次五日 index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"] - five_days_predict_price.index = index_labels - y_hat_riduzhoudu = pd.concat([y_hat_zhoudu, five_days_predict_price], axis=0) + five_days_predict_price.index = index_labels + y_hat_riduzhoudu = pd.concat( + [y_hat_zhoudu, five_days_predict_price], axis=0) y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds') print(y_hat_riduzhoudu) # 绘制PP期货日度周度预测结果的图表 - plot_pp_predict_result(y_hat_riduzhoudu, global_config,'zhoudu') - - - - - + plot_pp_predict_result(y_hat_riduzhoudu, global_config, 'zhoudu') def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'):