聚烯烃添加特征重要度图
This commit is contained in:
parent
538ef426f9
commit
b8ca24e07b
@ -553,6 +553,61 @@ def featureAnalysis(df, dataset, y):
|
|||||||
# plt.close()
|
# plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
# 计算特征重要度
|
||||||
|
def feature_importance(X_train, y_train):
|
||||||
|
import lightgbm as lgb
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from xgboost import XGBRegressor
|
||||||
|
from xgboost import plot_importance
|
||||||
|
|
||||||
|
# 特征选择
|
||||||
|
plt.figure(figsize=(2, 15))
|
||||||
|
|
||||||
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||||
|
|
||||||
|
temp = XGBRegressor()
|
||||||
|
temp.fit(X_train, y_train)
|
||||||
|
|
||||||
|
ax = plot_importance(temp)
|
||||||
|
fig = ax.figure
|
||||||
|
fig.set_size_inches(8, 7)
|
||||||
|
|
||||||
|
# 修改图的标题,添加模型名称
|
||||||
|
title = '特征重要度1' # 替换为你的模型名称
|
||||||
|
ax.set_title(title)
|
||||||
|
|
||||||
|
# plt.show()
|
||||||
|
# 保存图片
|
||||||
|
plt.savefig(os.path.join(config.dataset, '特征重要度1.png'))
|
||||||
|
plt.close()
|
||||||
|
config.logger.info('特征重要度1.png 已保存')
|
||||||
|
|
||||||
|
# 创建一个 LGBMRegressor 对象并训练模型
|
||||||
|
regressor = lgb.LGBMRegressor()
|
||||||
|
regressor.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# 设置图形大小(可选)
|
||||||
|
plt.figure(figsize=(30, 40))
|
||||||
|
# 使用 plot_importance 函数来绘制特征重要性
|
||||||
|
# 注意:在一些版本的 LightGBM 中,你可以直接传入模型对象
|
||||||
|
ax = importance_plot = lgb.plot_importance(
|
||||||
|
regressor, importance_type='gain') # 或者 'split'
|
||||||
|
# 设置标题和字体大小
|
||||||
|
ax.set_title('Feature Importance - LGBMRegressor', fontsize=12)
|
||||||
|
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
|
||||||
|
ax.get_xticklabels() + ax.get_yticklabels()):
|
||||||
|
item.set_fontsize(9) # 设置 x 轴、y 轴标签以及刻度标签的字体大小
|
||||||
|
# 修改图的标题,添加模型名称
|
||||||
|
# title = 'Feature Importance - LGBMRegressor ' # 替换为你的模型名称
|
||||||
|
# ax.set_title(title)
|
||||||
|
# 保存图片
|
||||||
|
plt.savefig(os.path.join(config.dataset, '特征重要度2.png'))
|
||||||
|
|
||||||
|
# 显示图形
|
||||||
|
plt.close()
|
||||||
|
config.logger.info('特征重要度2.png 已保存')
|
||||||
|
|
||||||
|
|
||||||
def corr_feature(df):
|
def corr_feature(df):
|
||||||
# 重新命名列名,列名排序,y在第一个
|
# 重新命名列名,列名排序,y在第一个
|
||||||
df.reindex(['y'] + sorted(df.columns.difference(['y'])))
|
df.reindex(['y'] + sorted(df.columns.difference(['y'])))
|
||||||
@ -1102,6 +1157,7 @@ def datachuli_juxiting(df_zhibiaoshuju, df_zhibiaoliebiao, datecol='date', end_t
|
|||||||
|
|
||||||
calculate_correlation(df=df)
|
calculate_correlation(df=df)
|
||||||
featureAnalysis(df, dataset=dataset, y='y')
|
featureAnalysis(df, dataset=dataset, y='y')
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
@ -349,6 +349,11 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
config.logger.info("\nTesting set head:")
|
config.logger.info("\nTesting set head:")
|
||||||
config.logger.info(df_test.head())
|
config.logger.info(df_test.head())
|
||||||
|
|
||||||
|
# 特征重要度
|
||||||
|
X_train = df_train.drop(columns=['y'])
|
||||||
|
y_train = df_train['y']
|
||||||
|
feature_importance(X_train=X_train, y_train=y_train)
|
||||||
|
|
||||||
models = [
|
models = [
|
||||||
NHITS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps,
|
NHITS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps,
|
||||||
scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps),
|
scaler_type='standard', activation='ReLU', early_stop_patience_steps=early_stop_patience_steps),
|
||||||
@ -396,7 +401,8 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
|
|
||||||
# VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了
|
# VanillaTransformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了
|
||||||
# Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了
|
# Autoformer(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ), //报错了
|
||||||
NBEATS(h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard', ),
|
NBEATS(h=horizon, input_size=input_size, max_steps=train_steps,
|
||||||
|
val_check_steps=val_check_steps, scaler_type='standard', ),
|
||||||
# NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ), //报错
|
# NBEATSx (h=horizon, input_size=input_size, max_steps=train_steps, val_check_steps=val_check_steps, scaler_type='standard',activation='ReLU', ), //报错
|
||||||
# HINT(h=horizon),
|
# HINT(h=horizon),
|
||||||
|
|
||||||
@ -462,8 +468,8 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
df_predict['created_dt'] = end_time
|
df_predict['created_dt'] = end_time
|
||||||
|
|
||||||
# 预测结果保留整数(先四舍五入再转换为整数类型)
|
# 预测结果保留整数(先四舍五入再转换为整数类型)
|
||||||
df_predict = df_predict.round().astype({col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']})
|
df_predict = df_predict.round().astype(
|
||||||
|
{col: 'int' for col in df_predict.columns if col not in ['ds', 'created_dt']})
|
||||||
|
|
||||||
# 保存预测值
|
# 保存预测值
|
||||||
df_predict.to_csv(os.path.join(config.dataset, "predict.csv"), index=False)
|
df_predict.to_csv(os.path.join(config.dataset, "predict.csv"), index=False)
|
||||||
@ -2055,7 +2061,6 @@ def model_losss_juxiting(sqlitedb, end_time, is_fivemodels):
|
|||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
def add_upper_lower_bound(row):
|
def add_upper_lower_bound(row):
|
||||||
print(row['columns'])
|
print(row['columns'])
|
||||||
print(type(row['columns']))
|
print(type(row['columns']))
|
||||||
@ -3343,7 +3348,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
stime = df3['ds'].iloc[0]
|
stime = df3['ds'].iloc[0]
|
||||||
etime = df3['ds'].iloc[-1]
|
etime = df3['ds'].iloc[-1]
|
||||||
# 添加偏差率表格
|
# 添加偏差率表格
|
||||||
fivemodels = '、'.join(eval_df['模型(Model)'].values[:5]) # 字符串形式,后面写入字符串使用
|
fivemodels = '、'.join(
|
||||||
|
eval_df['模型(Model)'].values[:5]) # 字符串形式,后面写入字符串使用
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:'))
|
f'预测使用了{num_models}个模型进行训练,使用评估结果MAE前五的模型分别是 {fivemodels} ,模型上一预测区间 {stime} -- {etime}的偏差率(%)分别是:'))
|
||||||
# # 添加偏差率表格
|
# # 添加偏差率表格
|
||||||
@ -3541,36 +3547,37 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
def pp_bdwd_png(global_config):
|
def pp_bdwd_png(global_config):
|
||||||
best_bdwd_price = find_best_models(
|
best_bdwd_price = find_best_models(
|
||||||
date=global_config['end_time'], global_config=global_config)
|
date=global_config['end_time'], global_config=global_config)
|
||||||
y_hat_yuedu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][-4:]
|
y_hat_yuedu = pd.DataFrame(
|
||||||
|
best_bdwd_price).T[['date', 'predictresult']][-4:]
|
||||||
y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date'])
|
y_hat_yuedu['ds'] = pd.to_datetime(y_hat_yuedu['date'])
|
||||||
# 绘制PP期货月度预测结果的图表
|
# 绘制PP期货月度预测结果的图表
|
||||||
plot_pp_predict_result(y_hat_yuedu, global_config)
|
plot_pp_predict_result(y_hat_yuedu, global_config)
|
||||||
|
|
||||||
y_hat_zhoudu = pd.DataFrame(best_bdwd_price).T[['date', 'predictresult']][2:4]
|
y_hat_zhoudu = pd.DataFrame(
|
||||||
|
best_bdwd_price).T[['date', 'predictresult']][2:4]
|
||||||
y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date'])
|
y_hat_zhoudu['ds'] = pd.to_datetime(y_hat_zhoudu['date'])
|
||||||
y_hat_zhoudu.drop(columns=['date'], inplace=True)
|
y_hat_zhoudu.drop(columns=['date'], inplace=True)
|
||||||
print(y_hat_zhoudu)
|
print(y_hat_zhoudu)
|
||||||
# 获取本周最佳模型的五日预测价格
|
# 获取本周最佳模型的五日预测价格
|
||||||
five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv')
|
five_days_predict_price = pd.read_csv('juxitingdataset/predict.csv')
|
||||||
week_price_modelname = best_bdwd_price['week_price']['model_name']
|
week_price_modelname = best_bdwd_price['week_price']['model_name']
|
||||||
five_days_predict_price = five_days_predict_price[['ds',week_price_modelname]]
|
five_days_predict_price = five_days_predict_price[[
|
||||||
five_days_predict_price['ds'] = pd.to_datetime(five_days_predict_price['ds'])
|
'ds', week_price_modelname]]
|
||||||
five_days_predict_price.rename(columns={week_price_modelname:'predictresult'},inplace=True)
|
five_days_predict_price['ds'] = pd.to_datetime(
|
||||||
|
five_days_predict_price['ds'])
|
||||||
|
five_days_predict_price.rename(
|
||||||
|
columns={week_price_modelname: 'predictresult'}, inplace=True)
|
||||||
# 设置索引 次日 次二日 次三日 次四日 次五日
|
# 设置索引 次日 次二日 次三日 次四日 次五日
|
||||||
index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"]
|
index_labels = ["次日", "次二日", "次三日", "次四日", "次五日"]
|
||||||
five_days_predict_price.index = index_labels
|
five_days_predict_price.index = index_labels
|
||||||
y_hat_riduzhoudu = pd.concat([y_hat_zhoudu, five_days_predict_price], axis=0)
|
y_hat_riduzhoudu = pd.concat(
|
||||||
|
[y_hat_zhoudu, five_days_predict_price], axis=0)
|
||||||
y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds')
|
y_hat_riduzhoudu = y_hat_riduzhoudu.sort_values(by='ds')
|
||||||
print(y_hat_riduzhoudu)
|
print(y_hat_riduzhoudu)
|
||||||
# 绘制PP期货日度周度预测结果的图表
|
# 绘制PP期货日度周度预测结果的图表
|
||||||
plot_pp_predict_result(y_hat_riduzhoudu, global_config, 'zhoudu')
|
plot_pp_predict_result(y_hat_riduzhoudu, global_config, 'zhoudu')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'):
|
def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, inputsize=5, dataset='dataset', time='2024-07-30', reportname='report.pdf'):
|
||||||
global y
|
global y
|
||||||
# 创建内容对应的空列表
|
# 创建内容对应的空列表
|
||||||
|
Loading…
Reference in New Issue
Block a user