预测结果判断近20个交易日的最佳模型

This commit is contained in:
workpc 2024-11-07 10:10:37 +08:00
parent dbaf99fe38
commit f4eea44c8c
3 changed files with 57 additions and 57 deletions

View File

@ -210,7 +210,7 @@ upload_data = {
### 开关 ### 开关
is_train = True # 是否训练 is_train = True # 是否训练
is_debug = True # 是否调试 is_debug = False # 是否调试
is_eta = False # 是否使用eta接口 is_eta = False # 是否使用eta接口
is_timefurture = True # 是否使用时间特征 is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_fivemodels = False # 是否使用之前保存的最佳的5个模型

56
main.py
View File

@ -39,8 +39,8 @@ def predict_main():
edbbusinessurl=edbbusinessurl, edbbusinessurl=edbbusinessurl,
) )
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理 # df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理 df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# 数据处理 # 数据处理
@ -118,36 +118,36 @@ def predict_main():
row,col = df.shape row,col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df, ex_Model(df,
# horizon=horizon, horizon=horizon,
# input_size=input_size, input_size=input_size,
# train_steps=train_steps, train_steps=train_steps,
# val_check_steps=val_check_steps, val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps, early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug, is_debug=is_debug,
# dataset=dataset, dataset=dataset,
# is_train=is_train, is_train=is_train,
# is_fivemodels=is_fivemodels, is_fivemodels=is_fivemodels,
# val_size=val_size, val_size=val_size,
# test_size=test_size, test_size=test_size,
# settings=settings, settings=settings,
# now=now, now=now,
# etadata = etadata, etadata = etadata,
# modelsindex = modelsindex, modelsindex = modelsindex,
# data = data, data = data,
# is_eta=is_eta, is_eta=is_eta,
# ) )
# # 模型评估 # # 模型评估
model_results3 = model_losss_juxiting(sqlitedb) model_results3 = model_losss_juxiting(sqlitedb)
# 模型报告 # 模型报告
# title = f'{settings}--{now}-预测报告' # 报告标题 title = f'{settings}--{now}-预测报告' # 报告标题
# brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# reportname=reportname,sqlitedb=sqlitedb), reportname=reportname,sqlitedb=sqlitedb),
# # pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time, # pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# # reportname=reportname), # reportname=reportname),
# logger.info('模型训练完成') logger.info('模型训练完成')
# tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname) # tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
@ -170,7 +170,7 @@ def predict_main():
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime), file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
ssl=ssl, ssl=ssl,
) )
# m.send_mail() m.send_mail()
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -510,33 +510,11 @@ def model_losss_juxiting(sqlitedb):
# 使用最佳五个模型进行绘图 # 使用最佳五个模型进行绘图
best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist() # best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
def find_min_max_within_quantile(row):
row = row[best_models]
q10 = row.min()
q90 = row.max()
# 获取 row行10%分位值对应的模型名称
min_model = row[row == q10].idxmin()
max_model = row[row == q90].idxmin()
# # 判断flot值是否为空值
# if pd.isna(q10) or pd.isna(q90):
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
# 遍历行
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
df_combined = df_combined.round(4)
print(df_combined3)
# 通道使用预测模型的80%置信度
# def find_min_max_within_quantile(row): # def find_min_max_within_quantile(row):
# row.drop(['ds','y'], inplace=True) # row = row[best_models]
# # 获取分位数10%和90%的值 # q10 = row.min()
# q10 = row.quantile(0.1) # q90 = row.max()
# q90 = row.quantile(0.9)
# # 获取 row行10%分位值对应的模型名称 # # 获取 row行10%分位值对应的模型名称
# min_model = row[row == q10].idxmin() # min_model = row[row == q10].idxmin()
# max_model = row[row == q90].idxmin() # max_model = row[row == q90].idxmin()
@ -549,6 +527,28 @@ def model_losss_juxiting(sqlitedb):
# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) # df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
# df_combined = df_combined.round(4) # df_combined = df_combined.round(4)
# print(df_combined3) # print(df_combined3)
# 通道使用预测模型的80%置信度
def find_min_max_within_quantile(row):
row.drop(['ds','y'], inplace=True)
# 获取分位数10%和90%的值
q10 = row.quantile(0.1)
q90 = row.quantile(0.9)
# 获取 row行10%分位值对应的模型名称
min_model = row[row == q10].index[0]
max_model = row[row == q90].index[0]
# # 判断flot值是否为空值
# if pd.isna(q10) or pd.isna(q90):
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
# 遍历行
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
df_combined = df_combined.round(4)
print(df_combined3)
@ -679,8 +679,8 @@ def model_losss_juxiting(sqlitedb):
# 最多频率的模型名称 # 最多频率的模型名称
min_model_max_frequency_model = df_combined3['min_model'].value_counts().idxmax() min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()
max_model_max_frequency_model = df_combined3['max_model'].value_counts().idxmax() max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()
df_predict['min_model'] = min_model_max_frequency_model df_predict['min_model'] = min_model_max_frequency_model
df_predict['max_model'] = max_model_max_frequency_model df_predict['max_model'] = max_model_max_frequency_model
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]