聚烯烃添加特征重要度图
This commit is contained in:
		
							parent
							
								
									538ef426f9
								
							
						
					
					
						commit
						b8ca24e07b
					
				| @ -553,6 +553,61 @@ def featureAnalysis(df, dataset, y): | |||||||
|     #         plt.close() |     #         plt.close() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # 计算特征重要度 | ||||||
|  | def feature_importance(X_train, y_train): | ||||||
|  |     import lightgbm as lgb | ||||||
|  |     import matplotlib.pyplot as plt | ||||||
|  |     from xgboost import XGBRegressor | ||||||
|  |     from xgboost import plot_importance | ||||||
|  | 
 | ||||||
|  |     # 特征选择 | ||||||
|  |     plt.figure(figsize=(2, 15)) | ||||||
|  | 
 | ||||||
|  |     plt.rcParams['font.sans-serif'] = ['SimHei'] | ||||||
|  | 
 | ||||||
|  |     temp = XGBRegressor() | ||||||
|  |     temp.fit(X_train, y_train) | ||||||
|  | 
 | ||||||
|  |     ax = plot_importance(temp) | ||||||
|  |     fig = ax.figure | ||||||
|  |     fig.set_size_inches(8, 7) | ||||||
|  | 
 | ||||||
|  |     # 修改图的标题,添加模型名称 | ||||||
|  |     title = '特征重要度1'  # 替换为你的模型名称 | ||||||
|  |     ax.set_title(title) | ||||||
|  | 
 | ||||||
|  |     # plt.show() | ||||||
|  |     # 保存图片 | ||||||
|  |     plt.savefig(os.path.join(config.dataset, '特征重要度1.png')) | ||||||
|  |     plt.close() | ||||||
|  |     config.logger.info('特征重要度1.png 已保存') | ||||||
|  | 
 | ||||||
|  |     # 创建一个 LGBMRegressor 对象并训练模型 | ||||||
|  |     regressor = lgb.LGBMRegressor() | ||||||
|  |     regressor.fit(X_train, y_train) | ||||||
|  | 
 | ||||||
|  |     # 设置图形大小(可选) | ||||||
|  |     plt.figure(figsize=(30, 40)) | ||||||
|  |     # 使用 plot_importance 函数来绘制特征重要性 | ||||||
|  |     # 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象 | ||||||
|  |     ax = importance_plot = lgb.plot_importance( | ||||||
|  |         regressor, importance_type='gain')  # 或者 'split' | ||||||
|  |     # 设置标题和字体大小 | ||||||
|  |     ax.set_title('Feature Importance - LGBMRegressor', fontsize=12) | ||||||
|  |     for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + | ||||||
|  |                  ax.get_xticklabels() + ax.get_yticklabels()): | ||||||
|  |         item.set_fontsize(9)  # 设置 x 轴、y 轴标签以及刻度标签的字体大小 | ||||||
|  |     # 修改图的标题,添加模型名称 | ||||||
|  |     # title = 'Feature Importance - LGBMRegressor '  # 替换为你的模型名称 | ||||||
|  |     # ax.set_title(title) | ||||||
|  |     # 保存图片 | ||||||
|  |     plt.savefig(os.path.join(config.dataset, '特征重要度2.png')) | ||||||
|  | 
 | ||||||
|  |     # 显示图形 | ||||||
|  |     plt.close() | ||||||
|  |     config.logger.info('特征重要度2.png 已保存') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def corr_feature(df): | def corr_feature(df): | ||||||
|     # 重新命名列名,列名排序,y在第一个 |     # 重新命名列名,列名排序,y在第一个 | ||||||
|     df.reindex(['y'] + sorted(df.columns.difference(['y']))) |     df.reindex(['y'] + sorted(df.columns.difference(['y']))) | ||||||
| @ -1102,6 +1157,7 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t | |||||||
| 
 | 
 | ||||||
|     calculate_correlation(df=df) |     calculate_correlation(df=df) | ||||||
|     featureAnalysis(df, dataset=dataset, y='y') |     featureAnalysis(df, dataset=dataset, y='y') | ||||||
|  | 
 | ||||||
|     return df |     return df | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -349,6 +349,11 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | |||||||
|     config.logger.info("\nTesting set head:") |     config.logger.info("\nTesting set head:") | ||||||
|     config.logger.info(df_test.head()) |     config.logger.info(df_test.head()) | ||||||
| 
 | 
 | ||||||
|  |     # 特征重要度 | ||||||
|  |     X_train = df_train.drop(columns=['y']) | ||||||
|  |     y_train = df_train['y'] | ||||||
|  |     feature_importance(X_train=X_train, y_train=y_train) | ||||||
|  | 
 | ||||||
|     models = [ |     models = [ | ||||||
|         NHITS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, |         NHITS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, | ||||||
|               scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps), |               scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps), | ||||||
| @ -396,7 +401,8 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | |||||||
| 
 | 
 | ||||||
|         # VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ),  //报错了 |         # VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ),  //报错了 | ||||||
|         # Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 |         # Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了 | ||||||
|         NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), |         NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, | ||||||
|  |                val_check_steps=val_check_steps, scaler_type='standard', ), | ||||||
|         # NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ),   //报错 |         # NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ),   //报错 | ||||||
|         # HINT(h=horizon), |         # HINT(h=horizon), | ||||||
| 
 | 
 | ||||||
| @ -462,8 +468,8 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | |||||||
|     df_predict['created_dt'] = end_time |     df_predict['created_dt'] = end_time | ||||||
| 
 | 
 | ||||||
|     # 预测结果保留整数(先四舍五入再转换为整数类型) |     # 预测结果保留整数(先四舍五入再转换为整数类型) | ||||||
|     df_predict = df_predict.round().astype({col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']}) |     df_predict = df_predict.round().astype( | ||||||
| 
 |         {col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']}) | ||||||
| 
 | 
 | ||||||
|     # 保存预测值 |     # 保存预测值 | ||||||
|     df_predict.to_csv(os.path.join(config.dataset, "predict.csv"), index=False) |     df_predict.to_csv(os.path.join(config.dataset, "predict.csv"), index=False) | ||||||
| @ -2055,7 +2061,6 @@ def model_losss_juxiting(sqlitedb, end_time, is_fivemodels): | |||||||
|     except ValueError as e: |     except ValueError as e: | ||||||
|         print(e) |         print(e) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
|     def add_upper_lower_bound(row): |     def add_upper_lower_bound(row): | ||||||
|         print(row['columns']) |         print(row['columns']) | ||||||
|         print(type(row['columns'])) |         print(type(row['columns'])) | ||||||
| @ -3343,7 +3348,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input | |||||||
|         stime = df3['ds'].iloc[0] |         stime = df3['ds'].iloc[0] | ||||||
|         etime = df3['ds'].iloc[-1] |         etime = df3['ds'].iloc[-1] | ||||||
|         # 添加偏差率表格 |         # 添加偏差率表格 | ||||||
|         fivemodels = '、'.join(eval_df['模型(Model)'].values[:5])  # 字符串形式,后面写入字符串使用 |         fivemodels = '、'.join( | ||||||
|  |             eval_df['模型(Model)'].values[:5])  # 字符串形式,后面写入字符串使用 | ||||||
|         content.append(Graphs.draw_text( |         content.append(Graphs.draw_text( | ||||||
|             f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:')) |             f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:')) | ||||||
|         # # 添加偏差率表格 |         # # 添加偏差率表格 | ||||||
| @ -3541,34 +3547,35 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input | |||||||
| def pp_bdwd_png(global_config): | def pp_bdwd_png(global_config): | ||||||
|     best_bdwd_price = find_best_models( |     best_bdwd_price = find_best_models( | ||||||
|         date=global_config['end_time'], global_config=global_config) |         date=global_config['end_time'], global_config=global_config) | ||||||
|     y_hat_yuedu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][-4:] |     y_hat_yuedu = pd.DataFrame( | ||||||
|  |         best_bdwd_price).T[['date', 'predictresult']][-4:] | ||||||
|     y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date']) |     y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date']) | ||||||
|     # 绘制PP期货月度预测结果的图表 |     # 绘制PP期货月度预测结果的图表 | ||||||
|     plot_pp_predict_result(y_hat_yuedu, global_config) |     plot_pp_predict_result(y_hat_yuedu, global_config) | ||||||
| 
 | 
 | ||||||
|     y_hat_zhoudu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][2:4] |     y_hat_zhoudu = pd.DataFrame( | ||||||
|  |         best_bdwd_price).T[['date', 'predictresult']][2:4] | ||||||
|     y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date']) |     y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date']) | ||||||
|     y_hat_zhoudu.drop(columns=['date'],inplace=True) |     y_hat_zhoudu.drop(columns=['date'], inplace=True) | ||||||
|     print(y_hat_zhoudu) |     print(y_hat_zhoudu) | ||||||
|     # 获取本周最佳模型的五日预测价格 |     # 获取本周最佳模型的五日预测价格 | ||||||
|     five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv') |     five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv') | ||||||
|     week_price_modelname = best_bdwd_price['week_price']['model_name'] |     week_price_modelname = best_bdwd_price['week_price']['model_name'] | ||||||
|     five_days_predict_price = five_days_predict_price[['ds',week_price_modelname]] |     five_days_predict_price = five_days_predict_price[[ | ||||||
|     five_days_predict_price['ds'] = pd.to_datetime(five_days_predict_price['ds']) |         'ds', week_price_modelname]] | ||||||
|     five_days_predict_price.rename(columns={week_price_modelname:'predictresult'},inplace=True) |     five_days_predict_price['ds'] = pd.to_datetime( | ||||||
|  |         five_days_predict_price['ds']) | ||||||
|  |     five_days_predict_price.rename( | ||||||
|  |         columns={week_price_modelname: 'predictresult'}, inplace=True) | ||||||
|     # 设置索引 次日 次二日 次三日 次四日  次五日 |     # 设置索引 次日 次二日 次三日 次四日  次五日 | ||||||
|     index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"] |     index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"] | ||||||
|     five_days_predict_price.index = index_labels |     five_days_predict_price.index = index_labels | ||||||
|     y_hat_riduzhoudu = pd.concat([y_hat_zhoudu, five_days_predict_price], axis=0) |     y_hat_riduzhoudu = pd.concat( | ||||||
|  |         [y_hat_zhoudu, five_days_predict_price], axis=0) | ||||||
|     y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds') |     y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds') | ||||||
|     print(y_hat_riduzhoudu) |     print(y_hat_riduzhoudu) | ||||||
|     # 绘制PP期货日度周度预测结果的图表 |     # 绘制PP期货日度周度预测结果的图表 | ||||||
|     plot_pp_predict_result(y_hat_riduzhoudu, global_config,'zhoudu') |     plot_pp_predict_result(y_hat_riduzhoudu, global_config, 'zhoudu') | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'): | def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'): | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user