预测结果判断近20个交易日的最佳模型

This commit is contained in:
workpc 2024-11-07 10:10:37 +08:00
parent dbaf99fe38
commit f4eea44c8c
3 changed files with 57 additions and 57 deletions

View File

@ -210,7 +210,7 @@ upload_data = {
### 开关
is_train = True # 是否训练
is_debug = True # 是否调试
is_debug = False # 是否调试
is_eta = False # 是否使用eta接口
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型

56
main.py
View File

@ -39,8 +39,8 @@ def predict_main():
edbbusinessurl=edbbusinessurl,
)
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
# 数据处理
@ -118,36 +118,36 @@ def predict_main():
row,col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df,
# horizon=horizon,
# input_size=input_size,
# train_steps=train_steps,
# val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug,
# dataset=dataset,
# is_train=is_train,
# is_fivemodels=is_fivemodels,
# val_size=val_size,
# test_size=test_size,
# settings=settings,
# now=now,
# etadata = etadata,
# modelsindex = modelsindex,
# data = data,
# is_eta=is_eta,
# )
ex_Model(df,
horizon=horizon,
input_size=input_size,
train_steps=train_steps,
val_check_steps=val_check_steps,
early_stop_patience_steps=early_stop_patience_steps,
is_debug=is_debug,
dataset=dataset,
is_train=is_train,
is_fivemodels=is_fivemodels,
val_size=val_size,
test_size=test_size,
settings=settings,
now=now,
etadata = etadata,
modelsindex = modelsindex,
data = data,
is_eta=is_eta,
)
# # 模型评估
model_results3 = model_losss_juxiting(sqlitedb)
# 模型报告
# title = f'{settings}--{now}-预测报告' # 报告标题
# brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# reportname=reportname,sqlitedb=sqlitedb),
# # pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# # reportname=reportname),
# logger.info('模型训练完成')
title = f'{settings}--{now}-预测报告' # 报告标题
brent_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
reportname=reportname,sqlitedb=sqlitedb),
# pp_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,time=end_time,
# reportname=reportname),
logger.info('模型训练完成')
# tansuanli_export_pdf(dataset=dataset,num_models = 5 if is_fivemodels else 22,end_time=end_time,reportname=reportname)
@ -170,7 +170,7 @@ def predict_main():
file=max(glob.glob(os.path.join(dataset,'*.pdf')), key=os.path.getctime),
ssl=ssl,
)
# m.send_mail()
m.send_mail()
if __name__ == '__main__':

View File

@ -510,33 +510,11 @@ def model_losss_juxiting(sqlitedb):
# 使用最佳五个模型进行绘图
best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
def find_min_max_within_quantile(row):
row = row[best_models]
q10 = row.min()
q90 = row.max()
# 获取 row行10%分位值对应的模型名称
min_model = row[row == q10].idxmin()
max_model = row[row == q90].idxmin()
# # 判断flot值是否为空值
# if pd.isna(q10) or pd.isna(q90):
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
# 遍历行
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
df_combined = df_combined.round(4)
print(df_combined3)
# 通道使用预测模型的80%置信度
# best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
# def find_min_max_within_quantile(row):
# row.drop(['ds','y'], inplace=True)
# # 获取分位数10%和90%的值
# q10 = row.quantile(0.1)
# q90 = row.quantile(0.9)
# row = row[best_models]
# q10 = row.min()
# q90 = row.max()
# # 获取 row行10%分位值对应的模型名称
# min_model = row[row == q10].idxmin()
# max_model = row[row == q90].idxmin()
@ -553,6 +531,28 @@ def model_losss_juxiting(sqlitedb):
# 通道使用预测模型的80%置信度
def find_min_max_within_quantile(row):
row.drop(['ds','y'], inplace=True)
# 获取分位数10%和90%的值
q10 = row.quantile(0.1)
q90 = row.quantile(0.9)
# 获取 row行10%分位值对应的模型名称
min_model = row[row == q10].index[0]
max_model = row[row == q90].index[0]
# # 判断flot值是否为空值
# if pd.isna(q10) or pd.isna(q90):
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
# 遍历行
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
df_combined = df_combined.round(4)
print(df_combined3)
# # 计算波动率
# df_combined3['volatility'] = df_combined3['y'].pct_change().round(4)
# # 计算近60日的波动率 10% 90%分位数
@ -679,8 +679,8 @@ def model_losss_juxiting(sqlitedb):
# 最多频率的模型名称
min_model_max_frequency_model = df_combined3['min_model'].value_counts().idxmax()
max_model_max_frequency_model = df_combined3['max_model'].value_counts().idxmax()
min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()
max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()
df_predict['min_model'] = min_model_max_frequency_model
df_predict['max_model'] = max_model_max_frequency_model
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]