原油价格预测第六次发版基版
This commit is contained in:
parent
9f209d0b3d
commit
19f76e6b83
@ -249,7 +249,8 @@ def predict_main():
|
||||
# 模型报告
|
||||
logger.info('制作报告ing')
|
||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
||||
|
||||
reportname = f'Brent原油大模型预测--{end_time}.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
|
||||
reportname=reportname,sqlitedb=sqlitedb),
|
||||
|
||||
@ -282,6 +283,6 @@ if __name__ == '__main__':
|
||||
global end_time
|
||||
is_on = True
|
||||
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
||||
for i_time in pd.date_range('2024-12-24', '2024-12-25', freq='B'):
|
||||
for i_time in pd.date_range('2024-12-27', '2024-12-28', freq='B'):
|
||||
end_time = i_time.strftime('%Y-%m-%d')
|
||||
predict_main()
|
||||
|
@ -237,21 +237,24 @@ def model_losss(sqlitedb,end_time):
|
||||
# 预测数据处理 predict
|
||||
# df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv"))
|
||||
# df_combined = dateConvert(df_combined)
|
||||
df_combined = sqlitedb.select_data('accuracy')
|
||||
df_combined = sqlitedb.select_data('accuracy',where_condition=f"created_dt <= '{end_time}'")
|
||||
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||
# 删除缺失值大于80%的列
|
||||
logger.info(df_combined.shape)
|
||||
df_combined = df_combined.loc[:, df_combined.isnull().mean() < 0.8]
|
||||
logger.info(df_combined.shape)
|
||||
# 删除缺失值
|
||||
df_combined.dropna(inplace=True)
|
||||
logger.info(df_combined.shape)
|
||||
# 其他列转为数值类型
|
||||
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['CREAT_DATE','ds','created_dt'] })
|
||||
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值
|
||||
df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('min')
|
||||
df_combined['max_cutoff'] = df_combined.groupby('ds')['CREAT_DATE'].transform('max')
|
||||
|
||||
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
|
||||
df_combined = df_combined[df_combined['CREAT_DATE'] == df_combined['max_cutoff']]
|
||||
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||
# 删除模型生成的cutoff列
|
||||
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE'], inplace=True)
|
||||
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff','created_dt','min_within_quantile','max_within_quantile','id','min_price','max_price','LOW_PRICE','HIGH_PRICE','mean'], inplace=True)
|
||||
# 获取模型名称
|
||||
modelnames = df_combined.columns.to_list()[1:]
|
||||
if 'y' in modelnames:
|
||||
@ -333,17 +336,14 @@ def model_losss(sqlitedb,end_time):
|
||||
names_df['columns'] = names_df.apply(add_rote_column, axis=1)
|
||||
|
||||
def add_upper_lower_bound(row):
|
||||
|
||||
print(row['columns'])
|
||||
print(type(row['columns']))
|
||||
# 计算上边界值
|
||||
upper_bound = row.max()
|
||||
upper_bound = df_combined3.loc[row.name,row['columns']].max()
|
||||
# 计算下边界值
|
||||
lower_bound = row.min()
|
||||
lower_bound = df_combined3.loc[row.name,row['columns']].min()
|
||||
return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
|
||||
|
||||
# df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
|
||||
|
||||
# 取前五最佳模型的最大最小值作为上下边界值
|
||||
df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)
|
||||
df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
|
||||
|
||||
def find_closest_values(row):
|
||||
x = row.y
|
||||
@ -419,7 +419,7 @@ def model_losss(sqlitedb,end_time):
|
||||
for id in ids:
|
||||
row = predict_y[predict_y['id'] == id]
|
||||
try:
|
||||
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean']}",f"id = {id}")
|
||||
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}",f"id = {id}")
|
||||
except:
|
||||
logger.error(f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||
|
||||
@ -467,10 +467,8 @@ def model_losss(sqlitedb,end_time):
|
||||
accuracy_rote = 0
|
||||
for i,group in df3.groupby('CREAT_DATE'):
|
||||
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
|
||||
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
|
||||
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
|
||||
df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
|
||||
df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)
|
||||
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
|
||||
create_dates,ds_dates = get_week_date(end_time)
|
||||
_get_accuracy_rate(df,create_dates,ds_dates,end_time)
|
||||
@ -536,18 +534,20 @@ def model_losss(sqlitedb,end_time):
|
||||
plt.xlabel('日期')
|
||||
plt.ylabel('价格')
|
||||
|
||||
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
|
||||
def _plt_modeltopten_predict_ture(df):
|
||||
lens = df.shape[0] if df.shape[0] < 180 else 90
|
||||
df['max_cutoff'] = df.groupby('ds')['CREAT_DATE'].transform('max')
|
||||
df = df[df['CREAT_DATE'] == df['max_cutoff']]
|
||||
df['mean'] = df['mean'].astype(float)
|
||||
lens = df.shape[0] if df.shape[0] < 180 else 180
|
||||
df = df[-lens:] # 取180个数据点画图
|
||||
df['mean_price'] = df[allmodelnames[:10]].mean(axis=1)
|
||||
# 历史价格
|
||||
plt.figure(figsize=(20, 10))
|
||||
plt.plot(df['ds'], df['y'], label='真实值')
|
||||
plt.plot(df['ds'], df['mean_price'], label='模型前十均值', linestyle='--', color='orange')
|
||||
plt.plot(df['ds'], df['mean'], label='模型前十均值', linestyle='--', color='orange')
|
||||
# 颜色填充
|
||||
plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)
|
||||
# markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
|
||||
@ -568,7 +568,7 @@ def model_losss(sqlitedb,end_time):
|
||||
plt.xlabel('日期')
|
||||
plt.ylabel('价格')
|
||||
|
||||
plt.savefig(os.path.join(dataset,f'{end_time}历史价格-预测值1.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
|
||||
@ -935,10 +935,9 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
|
||||
# except Exception as e:
|
||||
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
|
||||
|
||||
|
||||
|
||||
|
||||
### 添加标题
|
||||
content.append(Graphs.draw_title(f'{y}{time}预测报告'))
|
||||
content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
|
||||
|
||||
### 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
@ -946,16 +945,14 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
|
||||
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值.png')))
|
||||
# 波动率画图逻辑
|
||||
content.append(Graphs.draw_text('图示说明:'))
|
||||
content.append(Graphs.draw_text('1. 确定波动率置信区间:统计近60个交易日的真实价格波动率,找出在 10% ,90% 的分位值作为波动率置信区间;'))
|
||||
content.append(Graphs.draw_text('2. 确定通道上界:在所有模型的预测结果中 <= 前一天真实价格 乘以 90%的置信波动分位数'))
|
||||
content.append(Graphs.draw_text('3. 确定通道下界:在所有模型的预测结果中 >= 前一天真实价格 乘以 10%的置信波动分位数'))
|
||||
content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据,通道上界取近60个交易日内预测在上界值的模型对应的预测值,通道下界同理;'))
|
||||
content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内,最多接近真实值的模型的预测值对应的预测结果;'))
|
||||
content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。'))
|
||||
content.append(Graphs.draw_text(' 确定波动率置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||
|
||||
|
||||
# 添加历史走势及预测价格的走势图片
|
||||
content.append(Graphs.draw_img(os.path.join(dataset,'历史价格-预测值1.png')))
|
||||
|
||||
content.append(Graphs.draw_text('图示说明:'))
|
||||
content.append(Graphs.draw_text(' 确定波动率置信区间:使用模型评估指标MAE得到前十个模型,取平均值上下1.5作为价格波动置信区间;'))
|
||||
|
||||
|
||||
# 取df中y列为空的行
|
||||
import pandas as pd
|
||||
@ -990,6 +987,8 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
|
||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||
df = pd.read_csv(os.path.join(dataset,'testandpredict_groupby.csv'),encoding='utf-8')
|
||||
df4 = df.copy() # 计算偏差率使用
|
||||
# 去掉created_dt 列
|
||||
df4 = df4.drop(columns=['created_dt'])
|
||||
# 计算模型偏差率
|
||||
#计算各列对于y列的差值百分比
|
||||
df3 = pd.DataFrame() # 存储偏差率
|
||||
@ -1134,7 +1133,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
|
||||
eval_df = eval_df.T
|
||||
# df重置索引
|
||||
eval_df = eval_df.reset_index()
|
||||
eval_df = eval_df.T
|
||||
# eval_df = eval_df.T
|
||||
# # 添加表格
|
||||
data = eval_df.values.tolist()
|
||||
col_width = 500/len(eval_df.columns)
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user