原油价格预测第六次发版基版

This commit is contained in:
workpc 2024-12-27 18:04:40 +08:00
parent 9f209d0b3d
commit 19f76e6b83
3 changed files with 75 additions and 93 deletions

View File

@ -249,7 +249,8 @@ def predict_main():
# 模型报告 # 模型报告
logger.info('制作报告ing') logger.info('制作报告ing')
title = f'{settings}--{end_time}-预测报告' # 报告标题 title = f'{settings}--{end_time}-预测报告' # 报告标题
reportname = f'Brent原油大模型预测--{end_time}.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname,sqlitedb=sqlitedb), reportname=reportname,sqlitedb=sqlitedb),
@ -282,6 +283,6 @@ if __name__ == '__main__':
global end_time global end_time
is_on = True is_on = True
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期 # 遍历2024-11-25 到 2024-12-3 之间的工作日日期
for i_time in pd.date_range('2024-12-24', '2024-12-25', freq='B'): for i_time in pd.date_range('2024-12-27', '2024-12-28', freq='B'):
end_time = i_time.strftime('%Y-%m-%d') end_time = i_time.strftime('%Y-%m-%d')
predict_main() predict_main()

View File

@ -237,21 +237,24 @@ def model_losss(sqlitedb,end_time):
# 预测数据处理 predict # 预测数据处理 predict
# df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv")) # df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv"))
# df_combined = dateConvert(df_combined) # df_combined = dateConvert(df_combined)
df_combined = sqlitedb.select_data('accuracy') df_combined = sqlitedb.select_data('accuracy',where_condition=f"created_dt <= '{end_time}'")
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除缺失值大于80%的列 # 删除缺失值大于80%的列
logger.info(df_combined.shape)
df_combined = df_combined.loc[:, df_combined.isnull().mean() < 0.8] df_combined = df_combined.loc[:, df_combined.isnull().mean() < 0.8]
logger.info(df_combined.shape)
# 删除缺失值 # 删除缺失值
df_combined.dropna(inplace=True) df_combined.dropna(inplace=True)
logger.info(df_combined.shape)
# 其他列转为数值类型 # 其他列转为数值类型
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['CREAT_DATE','ds','created_dt'] }) df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['CREAT_DATE','ds','created_dt'] })
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值
df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('min') df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('max')
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
df_combined = df_combined[df_combined['CREAT_DATE'] == df_combined['max_cutoff']] df_combined = df_combined[df_combined['CREAT_DATE'] == df_combined['max_cutoff']]
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除模型生成的cutoff列 # 删除模型生成的cutoff列
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE'], inplace=True) df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE','mean'], inplace=True)
# 获取模型名称 # 获取模型名称
modelnames = df_combined.columns.to_list()[1:] modelnames = df_combined.columns.to_list()[1:]
if 'y' in modelnames: if 'y' in modelnames:
@ -333,17 +336,14 @@ def model_losss(sqlitedb,end_time):
names_df['columns'] = names_df.apply(add_rote_column, axis=1) names_df['columns'] = names_df.apply(add_rote_column, axis=1)
def add_upper_lower_bound(row): def add_upper_lower_bound(row):
print(row['columns'])
print(type(row['columns']))
# 计算上边界值 # 计算上边界值
upper_bound = row.max() upper_bound = df_combined3.loc[row.name,row['columns']].max()
# 计算下边界值 # 计算下边界值
lower_bound = row.min() lower_bound = df_combined3.loc[row.name,row['columns']].min()
return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile']) return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
# df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
# 取前五最佳模型的最大最小值作为上下边界值
df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)
def find_closest_values(row): def find_closest_values(row):
x = row.y x = row.y
@ -419,7 +419,7 @@ def model_losss(sqlitedb,end_time):
for id in ids: for id in ids:
row = predict_y[predict_y['id'] == id] row = predict_y[predict_y['id'] == id]
try: try:
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean']}",f"id = {id}") sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}",f"id = {id}")
except: except:
logger.error(f'更新accuracy表中的min_price,max_price,mean值失败row={row}') logger.error(f'更新accuracy表中的min_price,max_price,mean值失败row={row}')
@ -467,10 +467,8 @@ def model_losss(sqlitedb,end_time):
accuracy_rote = 0 accuracy_rote = 0
for i,group in df3.groupby('CREAT_DATE'): for i,group in df3.groupby('CREAT_DATE'):
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1] accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率']) df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote} df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False) df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
create_dates,ds_dates = get_week_date(end_time) create_dates,ds_dates = get_week_date(end_time)
_get_accuracy_rate(df,create_dates,ds_dates,end_time) _get_accuracy_rate(df,create_dates,ds_dates,end_time)
@ -536,18 +534,20 @@ def model_losss(sqlitedb,end_time):
plt.xlabel('日期') plt.xlabel('日期')
plt.ylabel('价格') plt.ylabel('价格')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值.png'), bbox_inches='tight') plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
plt.close() plt.close()
def _plt_modeltopten_predict_ture(df): def _plt_modeltopten_predict_ture(df):
lens = df.shape[0] if df.shape[0] < 180 else 90 df['max_cutoff'] = df.groupby('ds')['CREAT_DATE'].transform('max')
df = df[df['CREAT_DATE'] == df['max_cutoff']]
df['mean'] = df['mean'].astype(float)
lens = df.shape[0] if df.shape[0] < 180 else 180
df = df[-lens:] # 取180个数据点画图 df = df[-lens:] # 取180个数据点画图
df['mean_price'] = df[allmodelnames[:10]].mean(axis=1)
# 历史价格 # 历史价格
plt.figure(figsize=(20, 10)) plt.figure(figsize=(20, 10))
plt.plot(df['ds'], df['y'], label='真实值') plt.plot(df['ds'], df['y'], label='真实值')
plt.plot(df['ds'], df['mean_price'], label='模型前十均值', linestyle='--', color='orange') plt.plot(df['ds'], df['mean'], label='模型前十均值', linestyle='--', color='orange')
# 颜色填充 # 颜色填充
plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2) plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)
# markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd'] # markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
@ -568,7 +568,7 @@ def model_losss(sqlitedb,end_time):
plt.xlabel('日期') plt.xlabel('日期')
plt.ylabel('价格') plt.ylabel('价格')
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值1.png'), bbox_inches='tight') plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')
plt.close() plt.close()
@ -935,10 +935,9 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
# except Exception as e: # except Exception as e:
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}') # print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
### 添加标题 ### 添加标题
content.append(Graphs.draw_title(f'{y}{time}预测报告')) content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
### 预测结果 ### 预测结果
content.append(Graphs.draw_little_title('一、预测结果:')) content.append(Graphs.draw_little_title('一、预测结果:'))
@ -946,16 +945,14 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值.png'))) content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值.png')))
# 波动率画图逻辑 # 波动率画图逻辑
content.append(Graphs.draw_text('图示说明:')) content.append(Graphs.draw_text('图示说明:'))
content.append(Graphs.draw_text('1. 确定波动率置信区间统计近60个交易日的真实价格波动率找出在 10% 90% 的分位值作为波动率置信区间;')) content.append(Graphs.draw_text(' 确定波动率置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
content.append(Graphs.draw_text('2. 确定通道上界:在所有模型的预测结果中 <= 前一天真实价格 乘以 90%的置信波动分位数'))
content.append(Graphs.draw_text('3. 确定通道下界:在所有模型的预测结果中 >= 前一天真实价格 乘以 10%的置信波动分位数'))
content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据通道上界取近60个交易日内预测在上界值的模型对应的预测值通道下界同理'))
content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内最多接近真实值的模型的预测值对应的预测结果'))
content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。'))
# 添加历史走势及预测价格的走势图片 # 添加历史走势及预测价格的走势图片
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值1.png'))) content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值1.png')))
content.append(Graphs.draw_text('图示说明:'))
content.append(Graphs.draw_text(' 确定波动率置信区间使用模型评估指标MAE得到前十个模型取平均值上下1.5作为价格波动置信区间;'))
# 取df中y列为空的行 # 取df中y列为空的行
import pandas as pd import pandas as pd
@ -990,6 +987,8 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:')) content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
df = pd.read_csv(os.path.join(dataset,'testandpredict_groupby.csv'),encoding='utf-8') df = pd.read_csv(os.path.join(dataset,'testandpredict_groupby.csv'),encoding='utf-8')
df4 = df.copy() # 计算偏差率使用 df4 = df.copy() # 计算偏差率使用
# 去掉created_dt 列
df4 = df4.drop(columns=['created_dt'])
# 计算模型偏差率 # 计算模型偏差率
#计算各列对于y列的差值百分比 #计算各列对于y列的差值百分比
df3 = pd.DataFrame() # 存储偏差率 df3 = pd.DataFrame() # 存储偏差率
@ -1134,7 +1133,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
eval_df = eval_df.T eval_df = eval_df.T
# df重置索引 # df重置索引
eval_df = eval_df.reset_index() eval_df = eval_df.reset_index()
eval_df = eval_df.T # eval_df = eval_df.T
# # 添加表格 # # 添加表格
data = eval_df.values.tolist() data = eval_df.values.tolist()
col_width = 500/len(eval_df.columns) col_width = 500/len(eval_df.columns)

File diff suppressed because one or more lines are too long