更新聚烯烃画图,模型取80%置信度

This commit is contained in:
liurui 2024-11-07 16:21:48 +08:00
parent c8320bf849
commit 005c6c97e7
5 changed files with 60 additions and 49 deletions

View File

@ -178,7 +178,7 @@ is_update_report = False # 是否上传报告
# 数据截取日期
end_time = '' # 数据截取日期
end_time = '2024-10-28' # 数据截取日期
delweekenday = True
is_corr = False # 特征是否参与滞后领先提升相关系数
add_kdj = False # 是否添加kdj指标

Binary file not shown.

View File

@ -20,8 +20,8 @@ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
from datetime import timedelta
# from config_jingbo import *
from config_juxiting import *
from config_jingbo import *
# from config_juxiting import *
from sklearn import metrics
from reportlab.pdfbase import pdfmetrics # 注册字体
from reportlab.pdfbase.ttfonts import TTFont # 字体类

83
main.py
View File

@ -1,7 +1,7 @@
# 读取配置
# from config_jingbo import *
from config_jingbo import *
# from config_tansuanli import *
from config_juxiting import *
# from config_juxiting import *
from lib.dataread import *
from lib.tools import *
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting
@ -78,26 +78,31 @@ def predict_main():
is_weekday = datetime.datetime.now().weekday() == 3
if is_weekday:
logger.info('今天是周一,更新预测模型')
# 计算最近20天预测残差最低的模型名称
try:
# 计算最近20天预测残差最低的模型名称
model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
model_results = model_results.dropna()
modelnames = model_results.columns.to_list()[2:]
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
model_results[col] = model_results[col].astype(np.float32)
# 计算每个预测值与真实值之间的偏差率
for model in modelnames:
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20")
model_results = model_results.dropna()
modelnames = model_results.columns.to_list()[2:]
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
model_results[col] = model_results[col].astype(np.float32)
# 计算每个预测值与真实值之间的偏差率
for model in modelnames:
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
# 获取每行对应的最小偏差率值
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# 获取每行对应的最小偏差率值对应的列名
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
# 将列名索引转换为列名
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
# 取出现次数最多的模型名称
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
logger.info(f"最近20天预测残差最低的模型名称{most_common_model}")
# 获取每行对应的最小偏差率值
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# 获取每行对应的最小偏差率值对应的列名
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
# 将列名索引转换为列名
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
# 取出现次数最多的模型名称
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
logger.info(f"最近20天预测残差最低的模型名称{most_common_model}")
except Exception as e:
logger.error(f"最近20天预测残差最低的模型名称计算失败{e}")
# 取txt中的模型名称
most_common_model = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()[0]
# 保存结果到数据库
@ -118,25 +123,25 @@ def predict_main():
row,col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
# ex_Model(df,
# horizon=horizon,
# input_size=input_size,
# train_steps=train_steps,
# val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug,
# dataset=dataset,
# is_train=is_train,
# is_fivemodels=is_fivemodels,
# val_size=val_size,
# test_size=test_size,
# settings=settings,
# now=now,
# etadata = etadata,
# modelsindex = modelsindex,
# data = data,
# is_eta=is_eta,
# )
ex_Model(df,
horizon=horizon,
input_size=input_size,
train_steps=train_steps,
val_check_steps=val_check_steps,
early_stop_patience_steps=early_stop_patience_steps,
is_debug=is_debug,
dataset=dataset,
is_train=is_train,
is_fivemodels=is_fivemodels,
val_size=val_size,
test_size=test_size,
settings=settings,
now=now,
etadata = etadata,
modelsindex = modelsindex,
data = data,
is_eta=is_eta,
)
logger.info('模型训练完成')

View File

@ -540,7 +540,7 @@ def model_losss_juxiting(sqlitedb):
row = row[best_models]
q10 = row.min()
q90 = row.max()
# 获取 row行10%分位值对应的模型名称
# 获取 row行最大最小值模型名称
min_model = row[row == q10].idxmin()
max_model = row[row == q90].idxmin()
@ -559,12 +559,18 @@ def model_losss_juxiting(sqlitedb):
# # 通道使用预测模型的80%置信度
# def find_min_max_within_quantile(row):
# row.drop(['ds','y'], inplace=True)
# # 获取分位数10%和90%的值
# q10 = row.quantile(0.1)
# q90 = row.quantile(0.9)
# # 获取 row行10%分位值对应的模型名称
# min_model = row[row == q10].index[0]
# max_model = row[row == q90].index[0]
# row = row.astype(float).round(2)
# row_sorted = row.sort_values(ascending=True).reset_index(drop=True)
# # 计算 10% 和 90% 位置的索引
# index_10 = int(len(row_sorted) * 0.1)
# index_90 = int(len(row_sorted) * 0.9)
# q10 = row_sorted[index_10]
# q90 = row_sorted[index_90]
# # 获取模型名称
# min_model = row[row == q10].idxmin()
# max_model = row[row == q90].idxmin()
# # # 判断flot值是否为空值
# # if pd.isna(q10) or pd.isna(q90):