Compare commits

...

2 Commits

Author SHA1 Message Date
workpc
db21b28268 Merge branch 'main' of https://gitea.jayhgq.cn/liurui/PriceForecast 2025-05-29 13:51:14 +08:00
workpc
cf8614da3f 测试集验证逻辑调整 2025-05-29 13:51:09 +08:00
2 changed files with 21 additions and 22 deletions

View File

@ -451,22 +451,22 @@ def predict_main():
model_results3 = model_losss(sqlitedb, end_time=end_time)
logger.info('训练数据绘图end')
# 模型报告
logger.info('制作报告ing')
title = f'{settings}--{end_time}-预测报告' # 报告标题
reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名
reportname = reportname.replace(':', '-') # 替换冒号
brent_export_pdf(dataset=dataset,
num_models=5 if is_fivemodels else 22, time=end_time,
reportname=reportname,
inputsize=global_config['horizon'],
sqlitedb=sqlitedb
),
# # 模型报告
# logger.info('制作报告ing')
# title = f'{settings}--{end_time}-预测报告' # 报告标题
# reportname = f'Brent原油大模型日度预测--{end_time}.pdf' # 报告文件名
# reportname = reportname.replace(':', '-') # 替换冒号
# brent_export_pdf(dataset=dataset,
# num_models=5 if is_fivemodels else 22, time=end_time,
# reportname=reportname,
# inputsize=global_config['horizon'],
# sqlitedb=sqlitedb
# ),
logger.info('制作报告end')
logger.info('模型训练完成')
# logger.info('制作报告end')
# logger.info('模型训练完成')
push_market_value()
# push_market_value()
# # LSTM 单变量模型
# ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)

View File

@ -193,8 +193,8 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
# 模型交叉验证
nf_preds = nf.cross_validation(
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
nf_preds.to_csv(os.path.join(
config.dataset, "cross_validation.csv"), index=False)
# nf_preds.to_csv(os.path.join(
# config.dataset, "cross_validation.csv"), index=False)
nf_preds = nf_preds.reset_index()
# 保存模型
@ -498,7 +498,6 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
return
# 雍安环境预测评估指数
@exception_logger
def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
@ -965,7 +964,7 @@ def model_losss(sqlitedb, end_time):
try:
df_combined = sqlitedb.select_data(
'accuracy', where_condition=f"created_dt <= '{end_time}'")
if len(df_combined) < 100:
if len(df_combined) < 10000:
len(df_combined) + ''
if df_combined['y'].isnull().sum() / len(df_combined) > 0.8:
len(df_combined) + ''
@ -973,7 +972,7 @@ def model_losss(sqlitedb, end_time):
df_combined = loadcsv(os.path.join(
config.dataset, "cross_validation.csv"))
df_combined = dateConvert(df_combined)
df_combined['CREAT_DATE'] = df_combined['cutoff']
df_combined['CREAT_DATE'] = df_combined['ds']
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
# 删除缺失值大于80%的列
config.logger.info(df_combined.shape)
@ -985,12 +984,12 @@ def model_losss(sqlitedb, end_time):
# 其他列转为数值类型
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [
'CREAT_DATE', 'ds', 'created_dt', 'cutoff']})
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最值,并创建一个新的列来存储这个最大值
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最值,并创建一个新的列来存储这个最大值
df_combined['max_cutoff'] = df_combined.groupby(
'ds')['CREAT_DATE'].transform('max')
'ds')['cutoff'].transform('max')
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
df_combined = df_combined[df_combined['CREAT_DATE']
df_combined = df_combined[df_combined['cutoff']
== df_combined['max_cutoff']]
# 删除模型生成的cutoff列
df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile',