测试集验证逻辑调整
This commit is contained in:
		
							parent
							
								
									fc979d26ff
								
							
						
					
					
						commit
						cf8614da3f
					
				| @ -451,22 +451,22 @@ def predict_main(): | |||||||
|     model_results3 = model_losss(sqlitedb, end_time=end_time) |     model_results3 = model_losss(sqlitedb, end_time=end_time) | ||||||
|     logger.info('训练数据绘图end') |     logger.info('训练数据绘图end') | ||||||
| 
 | 
 | ||||||
|     # 模型报告 |     # # 模型报告 | ||||||
|     logger.info('制作报告ing') |     # logger.info('制作报告ing') | ||||||
|     title = f'{settings}--{end_time}-预测报告'  # 报告标题 |     # title = f'{settings}--{end_time}-预测报告'  # 报告标题 | ||||||
|     reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名 |     # reportname = f'Brent原油大模型日度预测--{end_time}.pdf'  # 报告文件名 | ||||||
|     reportname = reportname.replace(':', '-')  # 替换冒号 |     # reportname = reportname.replace(':', '-')  # 替换冒号 | ||||||
|     brent_export_pdf(dataset=dataset, |     # brent_export_pdf(dataset=dataset, | ||||||
|                      num_models=5 if is_fivemodels else 22, time=end_time, |     #                  num_models=5 if is_fivemodels else 22, time=end_time, | ||||||
|                      reportname=reportname, |     #                  reportname=reportname, | ||||||
|                      inputsize=global_config['horizon'], |     #                  inputsize=global_config['horizon'], | ||||||
|                      sqlitedb=sqlitedb |     #                  sqlitedb=sqlitedb | ||||||
|                      ), |     #                  ), | ||||||
| 
 | 
 | ||||||
|     logger.info('制作报告end') |     # logger.info('制作报告end') | ||||||
|     logger.info('模型训练完成') |     # logger.info('模型训练完成') | ||||||
| 
 | 
 | ||||||
|     push_market_value() |     # push_market_value() | ||||||
| 
 | 
 | ||||||
|     # # LSTM 单变量模型 |     # # LSTM 单变量模型 | ||||||
|     # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) |     # ex_Lstm(df,input_seq_len=input_size,output_seq_len=horizon,is_debug=is_debug,dataset=dataset) | ||||||
|  | |||||||
| @ -193,8 +193,8 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p | |||||||
|         # 模型交叉验证 |         # 模型交叉验证 | ||||||
|         nf_preds = nf.cross_validation( |         nf_preds = nf.cross_validation( | ||||||
|             df=df_train, val_size=val_size, test_size=test_size, n_windows=None) |             df=df_train, val_size=val_size, test_size=test_size, n_windows=None) | ||||||
|         nf_preds.to_csv(os.path.join( |         # nf_preds.to_csv(os.path.join( | ||||||
|             config.dataset, "cross_validation.csv"), index=False) |         #     config.dataset, "cross_validation.csv"), index=False) | ||||||
| 
 | 
 | ||||||
|         nf_preds = nf_preds.reset_index() |         nf_preds = nf_preds.reset_index() | ||||||
|         # 保存模型 |         # 保存模型 | ||||||
| @ -498,7 +498,6 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear | |||||||
|     return |     return | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| # 雍安环境预测评估指数 | # 雍安环境预测评估指数 | ||||||
| @exception_logger | @exception_logger | ||||||
| def model_losss_yongan(sqlitedb, end_time, table_name_prefix): | def model_losss_yongan(sqlitedb, end_time, table_name_prefix): | ||||||
| @ -965,7 +964,7 @@ def model_losss(sqlitedb, end_time): | |||||||
|     try: |     try: | ||||||
|         df_combined = sqlitedb.select_data( |         df_combined = sqlitedb.select_data( | ||||||
|             'accuracy', where_condition=f"created_dt <= '{end_time}'") |             'accuracy', where_condition=f"created_dt <= '{end_time}'") | ||||||
|         if len(df_combined) < 100: |         if len(df_combined) < 10000: | ||||||
|             len(df_combined) + '' |             len(df_combined) + '' | ||||||
|         if df_combined['y'].isnull().sum() / len(df_combined) > 0.8: |         if df_combined['y'].isnull().sum() / len(df_combined) > 0.8: | ||||||
|             len(df_combined) + '' |             len(df_combined) + '' | ||||||
| @ -973,7 +972,7 @@ def model_losss(sqlitedb, end_time): | |||||||
|         df_combined = loadcsv(os.path.join( |         df_combined = loadcsv(os.path.join( | ||||||
|             config.dataset, "cross_validation.csv")) |             config.dataset, "cross_validation.csv")) | ||||||
|         df_combined = dateConvert(df_combined) |         df_combined = dateConvert(df_combined) | ||||||
|         df_combined['CREAT_DATE'] = df_combined['cutoff'] |         df_combined['CREAT_DATE'] = df_combined['ds'] | ||||||
|     df_combined4 = df_combined.copy()  # 备份df_combined,后面画图需要 |     df_combined4 = df_combined.copy()  # 备份df_combined,后面画图需要 | ||||||
|     # 删除缺失值大于80%的列 |     # 删除缺失值大于80%的列 | ||||||
|     config.logger.info(df_combined.shape) |     config.logger.info(df_combined.shape) | ||||||
| @ -985,12 +984,12 @@ def model_losss(sqlitedb, end_time): | |||||||
|     # 其他列转为数值类型 |     # 其他列转为数值类型 | ||||||
|     df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [ |     df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in [ | ||||||
|                                      'CREAT_DATE', 'ds', 'created_dt', 'cutoff']}) |                                      'CREAT_DATE', 'ds', 'created_dt', 'cutoff']}) | ||||||
|     # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 |     # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最大值,并创建一个新的列来存储这个最大值 | ||||||
|     df_combined['max_cutoff'] = df_combined.groupby( |     df_combined['max_cutoff'] = df_combined.groupby( | ||||||
|         'ds')['CREAT_DATE'].transform('max') |         'ds')['cutoff'].transform('max') | ||||||
| 
 | 
 | ||||||
|     # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 |     # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 | ||||||
|     df_combined = df_combined[df_combined['CREAT_DATE'] |     df_combined = df_combined[df_combined['cutoff'] | ||||||
|                               == df_combined['max_cutoff']] |                               == df_combined['max_cutoff']] | ||||||
|     # 删除模型生成的cutoff列 |     # 删除模型生成的cutoff列 | ||||||
|     df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile', |     df_combined.drop(columns=['CREAT_DATE', 'max_cutoff', 'created_dt', 'min_within_quantile', 'max_within_quantile', | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user