测试集验证逻辑调整
This commit is contained in:
		
							parent
							
								
									fc979d26ff
								
							
						
					
					
						commit
						cf8614da3f
					
				| @ -451,22 +451,22 @@ def predict_main(): | ||||
|     model_results3 = model_losss(sqlitedb, end_time=end_time) | ||||
|     logger.info('训练数据绘图end') | ||||
| 
 | ||||
|     # 模型报告 | ||||
|     logger.info('制作报告ing') | ||||
|     title = f'{settings}--{end_time}-预测报告'  # 报告标题 | ||||
|     reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名 | ||||
|     reportname = reportname.replace(':', '-')  # 替换冒号 | ||||
|     brent_export_pdf(dataset=dataset, | ||||
|                      num_models=5 if is_fivemodels else 22, time=end_time, | ||||
|                      reportname=reportname, | ||||
|                      inputsize=global_config['horizon'], | ||||
|                      sqlitedb=sqlitedb | ||||
|                      ), | ||||
|     # # 模型报告 | ||||
|     # logger.info('制作报告ing') | ||||
|     # title = f'{settings}--{end_time}-预测报告'  # 报告标题 | ||||
|     # reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名 | ||||
|     # reportname = reportname.replace(':', '-')  # 替换冒号 | ||||
|     # brent_export_pdf(dataset=dataset, | ||||
|     #                  num_models=5 if is_fivemodels else 22, time=end_time, | ||||
|     #                  reportname=reportname, | ||||
|     #                  inputsize=global_config['horizon'], | ||||
|     #                  sqlitedb=sqlitedb | ||||
|     #                  ), | ||||
| 
 | ||||
|     logger.info('制作报告end') | ||||
|     logger.info('模型训练完成') | ||||
|     # logger.info('制作报告end') | ||||
|     # logger.info('模型训练完成') | ||||
| 
 | ||||
|     push_market_value() | ||||
|     # push_market_value() | ||||
| 
 | ||||
|     # # LSTM 单变量模型 | ||||
|     # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) | ||||
|  | ||||
| @ -193,8 +193,8 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p | ||||
|         # 模型交叉验证 | ||||
|         nf_preds = nf.cross_validation( | ||||
|             df=df_train, val_size=val_size, test_size=test_size, n_windows=None) | ||||
|         nf_preds.to_csv(os.path.join( | ||||
|             config.dataset, "cross_validation.csv"), index=False) | ||||
|         # nf_preds.to_csv(os.path.join( | ||||
|         #     config.dataset, "cross_validation.csv"), index=False) | ||||
| 
 | ||||
|         nf_preds = nf_preds.reset_index() | ||||
|         # 保存模型 | ||||
| @ -498,7 +498,6 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | ||||
|     return | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # 雍安环境预测评估指数 | ||||
| @exception_logger | ||||
| def model_losss_yongan(sqlitedb, end_time, table_name_prefix): | ||||
| @ -965,7 +964,7 @@ def model_losss(sqlitedb, end_time): | ||||
|     try: | ||||
|         df_combined = sqlitedb.select_data( | ||||
|             'accuracy', where_condition=f"created_dt <= '{end_time}'") | ||||
|         if len(df_combined) < 100: | ||||
|         if len(df_combined) < 10000: | ||||
|             len(df_combined) + '' | ||||
|         if df_combined['y'].isnull().sum() / len(df_combined) > 0.8: | ||||
|             len(df_combined) + '' | ||||
| @ -973,7 +972,7 @@ def model_losss(sqlitedb, end_time): | ||||
|         df_combined = loadcsv(os.path.join( | ||||
|             config.dataset, "cross_validation.csv")) | ||||
|         df_combined = dateConvert(df_combined) | ||||
|         df_combined['CREAT_DATE'] = df_combined['cutoff'] | ||||
|         df_combined['CREAT_DATE'] = df_combined['ds'] | ||||
|     df_combined4 = df_combined.copy()  # 备份df_combined,后面画图需要 | ||||
|     # 删除缺失值大于80%的列 | ||||
|     config.logger.info(df_combined.shape) | ||||
| @ -985,12 +984,12 @@ def model_losss(sqlitedb, end_time): | ||||
|     # 其他列转为数值类型 | ||||
|     df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [ | ||||
|                                      'CREAT_DATE', 'ds', 'created_dt', 'cutoff']}) | ||||
|     # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 | ||||
|     # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最大值,并创建一个新的列来存储这个最大值 | ||||
|     df_combined['max_cutoff'] = df_combined.groupby( | ||||
|         'ds')['CREAT_DATE'].transform('max') | ||||
|         'ds')['cutoff'].transform('max') | ||||
| 
 | ||||
|     # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 | ||||
|     df_combined = df_combined[df_combined['CREAT_DATE'] | ||||
|     df_combined = df_combined[df_combined['cutoff'] | ||||
|                               == df_combined['max_cutoff']] | ||||
|     # 删除模型生成的cutoff列 | ||||
|     df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile', | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user