Compare commits

...

2 Commits

Author SHA1 Message Date
workpc
db21b28268 Merge branch 'main' of https://gitea.jayhgq.cn/liurui/PriceForecast 2025-05-29 13:51:14 +08:00
workpc
cf8614da3f 测试集验证逻辑调整 2025-05-29 13:51:09 +08:00
2 changed files with 21 additions and 22 deletions

View File

@ -451,22 +451,22 @@ def predict_main():
model_results3 = model_losss(sqlitedb, end_time=end_time) model_results3 = model_losss(sqlitedb, end_time=end_time)
logger.info('训练数据绘图end') logger.info('训练数据绘图end')
# 模型报告 # # 模型报告
logger.info('制作报告ing') # logger.info('制作报告ing')
title = f'{settings}--{end_time}-预测报告' # 报告标题 # title = f'{settings}--{end_time}-预测报告' # 报告标题
reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名 # reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号 # reportname = reportname.replace(':', '-') # 替换冒号
brent_export_pdf(dataset=dataset, # brent_export_pdf(dataset=dataset,
num_models=5 if is_fivemodels else 22, time=end_time, # num_models=5 if is_fivemodels else 22, time=end_time,
reportname=reportname, # reportname=reportname,
inputsize=global_config['horizon'], # inputsize=global_config['horizon'],
sqlitedb=sqlitedb # sqlitedb=sqlitedb
), # ),
logger.info('制作报告end') # logger.info('制作报告end')
logger.info('模型训练完成') # logger.info('模型训练完成')
push_market_value() # push_market_value()
# # LSTM 单变量模型 # # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)

View File

@ -193,8 +193,8 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
# 模型交叉验证 # 模型交叉验证
nf_preds = nf.cross_validation( nf_preds = nf.cross_validation(
df=df_train, val_size=val_size, test_size=test_size, n_windows=None) df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
nf_preds.to_csv(os.path.join( # nf_preds.to_csv(os.path.join(
config.dataset, "cross_validation.csv"), index=False) # config.dataset, "cross_validation.csv"), index=False)
nf_preds = nf_preds.reset_index() nf_preds = nf_preds.reset_index()
# 保存模型 # 保存模型
@ -498,7 +498,6 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
return return
# 雍安环境预测评估指数 # 雍安环境预测评估指数
@exception_logger @exception_logger
def model_losss_yongan(sqlitedb, end_time, table_name_prefix): def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
@ -965,7 +964,7 @@ def model_losss(sqlitedb, end_time):
try: try:
df_combined = sqlitedb.select_data( df_combined = sqlitedb.select_data(
'accuracy', where_condition=f"created_dt <= '{end_time}'") 'accuracy', where_condition=f"created_dt <= '{end_time}'")
if len(df_combined) < 100: if len(df_combined) < 10000:
len(df_combined) + '' len(df_combined) + ''
if df_combined['y'].isnull().sum() / len(df_combined) > 0.8: if df_combined['y'].isnull().sum() / len(df_combined) > 0.8:
len(df_combined) + '' len(df_combined) + ''
@ -973,7 +972,7 @@ def model_losss(sqlitedb, end_time):
df_combined = loadcsv(os.path.join( df_combined = loadcsv(os.path.join(
config.dataset, "cross_validation.csv")) config.dataset, "cross_validation.csv"))
df_combined = dateConvert(df_combined) df_combined = dateConvert(df_combined)
df_combined['CREAT_DATE'] = df_combined['cutoff'] df_combined['CREAT_DATE'] = df_combined['ds']
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要 df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除缺失值大于80%的列 # 删除缺失值大于80%的列
config.logger.info(df_combined.shape) config.logger.info(df_combined.shape)
@ -985,12 +984,12 @@ def model_losss(sqlitedb, end_time):
# 其他列转为数值类型 # 其他列转为数值类型
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [ df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [
'CREAT_DATE', 'ds', 'created_dt', 'cutoff']}) 'CREAT_DATE', 'ds', 'created_dt', 'cutoff']})
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最值,并创建一个新的列来存储这个最大值 # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最值,并创建一个新的列来存储这个最大值
df_combined['max_cutoff'] = df_combined.groupby( df_combined['max_cutoff'] = df_combined.groupby(
'ds')['CREAT_DATE'].transform('max') 'ds')['cutoff'].transform('max')
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
df_combined = df_combined[df_combined['CREAT_DATE'] df_combined = df_combined[df_combined['cutoff']
== df_combined['max_cutoff']] == df_combined['max_cutoff']]
# 删除模型生成的cutoff列 # 删除模型生成的cutoff列
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile', df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile',