原油价格预测第六次发版基版

This commit is contained in:
workpc 2024-12-27 18:04:40 +08:00
parent 9f209d0b3d
commit 19f76e6b83
3 changed files with 75 additions and 93 deletions

View File

@ -249,7 +249,8 @@ def predict_main():
# 模型报告
logger.info('制作报告ing')
title = f'{settings}--{end_time}-预测报告' # 报告标题
reportname = f'Brent原油大模型预测--{end_time}.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname,sqlitedb=sqlitedb),
@ -282,6 +283,6 @@ if __name__ == '__main__':
global end_time
is_on = True
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
for i_time in pd.date_range('2024-12-24', '2024-12-25', freq='B'):
for i_time in pd.date_range('2024-12-27', '2024-12-28', freq='B'):
end_time = i_time.strftime('%Y-%m-%d')
predict_main()

View File

@ -237,21 +237,24 @@ def model_losss(sqlitedb,end_time):
# 预测数据处理 predict
# df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv"))
# df_combined = dateConvert(df_combined)
df_combined = sqlitedb.select_data('accuracy')
df_combined = sqlitedb.select_data('accuracy',where_condition=f"created_dt <= '{end_time}'")
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除缺失值大于80%的列
logger.info(df_combined.shape)
df_combined = df_combined.loc[:, df_combined.isnull().mean() < 0.8]
logger.info(df_combined.shape)
# 删除缺失值
df_combined.dropna(inplace=True)
logger.info(df_combined.shape)
# 其他列转为数值类型
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['CREAT_DATE','ds','created_dt'] })
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值
df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('min')
df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('max')
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
df_combined = df_combined[df_combined['CREAT_DATE'] == df_combined['max_cutoff']]
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除模型生成的cutoff列
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE'], inplace=True)
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE','mean'], inplace=True)
# 获取模型名称
modelnames = df_combined.columns.to_list()[1:]
if 'y' in modelnames:
@ -333,17 +336,14 @@ def model_losss(sqlitedb,end_time):
names_df['columns'] = names_df.apply(add_rote_column, axis=1)
def add_upper_lower_bound(row):
print(row['columns'])
print(type(row['columns']))
# 计算上边界值
upper_bound = row.max()
upper_bound = df_combined3.loc[row.name,row['columns']].max()
# 计算下边界值
lower_bound = row.min()
lower_bound = df_combined3.loc[row.name,row['columns']].min()
return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
# df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
# 取前五最佳模型的最大最小值作为上下边界值
df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)
df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
def find_closest_values(row):
x = row.y
@ -419,7 +419,7 @@ def model_losss(sqlitedb,end_time):
for id in ids:
row = predict_y[predict_y['id'] == id]
try:
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean']}",f"id = {id}")
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}",f"id = {id}")
except:
logger.error(f'更新accuracy表中的min_price,max_price,mean值失败row={row}')
@ -467,10 +467,8 @@ def model_losss(sqlitedb,end_time):
accuracy_rote = 0
for i,group in df3.groupby('CREAT_DATE'):
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
create_dates,ds_dates = get_week_date(end_time)
_get_accuracy_rate(df,create_dates,ds_dates,end_time)
@ -536,18 +534,20 @@ def model_losss(sqlitedb,end_time):
plt.xlabel('日期')
plt.ylabel('价格')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值.png'), bbox_inches='tight')
plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
plt.close()
def _plt_modeltopten_predict_ture(df):
lens = df.shape[0] if df.shape[0] < 180 else 90
df['max_cutoff'] = df.groupby('ds')['CREAT_DATE'].transform('max')
df = df[df['CREAT_DATE'] == df['max_cutoff']]
df['mean'] = df['mean'].astype(float)
lens = df.shape[0] if df.shape[0] < 180 else 180
df = df[-lens:] # 取180个数据点画图
df['mean_price'] = df[allmodelnames[:10]].mean(axis=1)
# 历史价格
plt.figure(figsize=(20, 10))
plt.plot(df['ds'], df['y'], label='真实值')
plt.plot(df['ds'], df['mean_price'], label='模型前十均值', linestyle='--', color='orange')
plt.plot(df['ds'], df['mean'], label='模型前十均值', linestyle='--', color='orange')
# 颜色填充
plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)
# markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
@ -568,7 +568,7 @@ def model_losss(sqlitedb,end_time):
plt.xlabel('日期')
plt.ylabel('价格')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值1.png'), bbox_inches='tight')
plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')
plt.close()
@ -936,9 +936,8 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
### 添加标题
content.append(Graphs.draw_title(f'{y}{time}预测报告'))
content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
### 预测结果
content.append(Graphs.draw_little_title('一、预测结果:'))
@ -946,15 +945,13 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值.png')))
# 波动率画图逻辑
content.append(Graphs.draw_text('图示说明:'))
content.append(Graphs.draw_text('1. 确定波动率置信区间统计近60个交易日的真实价格波动率找出在 10% 90% 的分位值作为波动率置信区间;'))
content.append(Graphs.draw_text('2. 确定通道上界:在所有模型的预测结果中 <= 前一天真实价格 乘以 90%的置信波动分位数'))
content.append(Graphs.draw_text('3. 确定通道下界:在所有模型的预测结果中 >= 前一天真实价格 乘以 10%的置信波动分位数'))
content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据通道上界取近60个交易日内预测在上界值的模型对应的预测值通道下界同理'))
content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内最多接近真实值的模型的预测值对应的预测结果'))
content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。'))
content.append(Graphs.draw_text(' 确定波动率置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
# 添加历史走势及预测价格的走势图片
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值1.png')))
content.append(Graphs.draw_text('图示说明:'))
content.append(Graphs.draw_text(' 确定波动率置信区间使用模型评估指标MAE得到前十个模型取平均值上下1.5作为价格波动置信区间;'))
# 取df中y列为空的行
@ -990,6 +987,8 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
df = pd.read_csv(os.path.join(dataset,'testandpredict_groupby.csv'),encoding='utf-8')
df4 = df.copy() # 计算偏差率使用
# 去掉created_dt 列
df4 = df4.drop(columns=['created_dt'])
# 计算模型偏差率
#计算各列对于y列的差值百分比
df3 = pd.DataFrame() # 存储偏差率
@ -1134,7 +1133,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
eval_df = eval_df.T
# df重置索引
eval_df = eval_df.reset_index()
eval_df = eval_df.T
# eval_df = eval_df.T
# # 添加表格
data = eval_df.values.tolist()
col_width = 500/len(eval_df.columns)

File diff suppressed because one or more lines are too long