测试集验证逻辑调整

2025-05-29 13:51:09 +08:00 · 2025-05-29 13:51:09 +08:00 · cf8614da3f
commit cf8614da3f
parent fc979d26ff
2 changed files with 21 additions and 22 deletions
--- a/main_yuanyou.py
+++ b/main_yuanyou.py
@ -451,22 +451,22 @@ def predict_main():
    model_results3 = model_losss(sqlitedb, end_time=end_time)
    logger.info('训练数据绘图end')

-    # 模型报告
-    logger.info('制作报告ing')
-    title = f'{settings}--{end_time}-预测报告'  # 报告标题
-    reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名
-    reportname = reportname.replace(':', '-')  # 替换冒号
-    brent_export_pdf(dataset=dataset,
-                     num_models=5 if is_fivemodels else 22, time=end_time,
-                     reportname=reportname,
-                     inputsize=global_config['horizon'],
-                     sqlitedb=sqlitedb
-                     ),
+    # # 模型报告
+    # logger.info('制作报告ing')
+    # title = f'{settings}--{end_time}-预测报告'  # 报告标题
+    # reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名
+    # reportname = reportname.replace(':', '-')  # 替换冒号
+    # brent_export_pdf(dataset=dataset,
+    #                  num_models=5 if is_fivemodels else 22, time=end_time,
+    #                  reportname=reportname,
+    #                  inputsize=global_config['horizon'],
+    #                  sqlitedb=sqlitedb
+    #                  ),

-    logger.info('制作报告end')
-    logger.info('模型训练完成')
+    # logger.info('制作报告end')
+    # logger.info('模型训练完成')

-    push_market_value()
+    # push_market_value()

    # # LSTM 单变量模型
    # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset)
--- a/models/nerulforcastmodels.py
+++ b/models/nerulforcastmodels.py
@ -193,8 +193,8 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
        # 模型交叉验证
        nf_preds = nf.cross_validation(
            df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
-        nf_preds.to_csv(os.path.join(
-            config.dataset, "cross_validation.csv"), index=False)
+        # nf_preds.to_csv(os.path.join(
+        #     config.dataset, "cross_validation.csv"), index=False)

        nf_preds = nf_preds.reset_index()
        # 保存模型
@ -498,7 +498,6 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
    return


-
 # 雍安环境预测评估指数
@exception_logger
 def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
@ -965,7 +964,7 @@ def model_losss(sqlitedb, end_time):
    try:
        df_combined = sqlitedb.select_data(
            'accuracy', where_condition=f"created_dt <= '{end_time}'")
-        if len(df_combined) < 100:
+        if len(df_combined) < 10000:
            len(df_combined) + ''
        if df_combined['y'].isnull().sum() / len(df_combined) > 0.8:
            len(df_combined) + ''
@ -973,7 +972,7 @@ def model_losss(sqlitedb, end_time):
        df_combined = loadcsv(os.path.join(
            config.dataset, "cross_validation.csv"))
        df_combined = dateConvert(df_combined)
-        df_combined['CREAT_DATE'] = df_combined['cutoff']
+        df_combined['CREAT_DATE'] = df_combined['ds']
    df_combined4 = df_combined.copy()  # 备份df_combined,后面画图需要
    # 删除缺失值大于80%的列
    config.logger.info(df_combined.shape)
@ -985,12 +984,12 @@ def model_losss(sqlitedb, end_time):
    # 其他列转为数值类型
    df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [
                                     'CREAT_DATE', 'ds', 'created_dt', 'cutoff']})
-    # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值，并创建一个新的列来存储这个最大值
+    # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最大值，并创建一个新的列来存储这个最大值
    df_combined['max_cutoff'] = df_combined.groupby(
-        'ds')['CREAT_DATE'].transform('max')
+        'ds')['cutoff'].transform('max')

    # 然后筛选出那些 cutoff 等于 max_cutoff 的行，这样就得到了每个分组中 cutoff 最大的行，并保留了其他列
-    df_combined = df_combined[df_combined['CREAT_DATE']
+    df_combined = df_combined[df_combined['cutoff']
                              == df_combined['max_cutoff']]
    # 删除模型生成的cutoff列
    df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile',