根据模型预测结果,对比真实值分组取出最大最小值
This commit is contained in:
parent
005c6c97e7
commit
83589bed46
@ -178,7 +178,7 @@ is_update_report = False # 是否上传报告
|
||||
|
||||
|
||||
# 数据截取日期
|
||||
end_time = '2024-10-28' # 数据截取日期
|
||||
end_time = '2024-10-29' # 数据截取日期
|
||||
delweekenday = True
|
||||
is_corr = False # 特征是否参与滞后领先提升相关系数
|
||||
add_kdj = False # 是否添加kdj指标
|
||||
|
Binary file not shown.
33705
debugdemo.ipynb
33705
debugdemo.ipynb
File diff suppressed because it is too large
Load Diff
6
main.py
6
main.py
@ -39,8 +39,8 @@ def predict_main():
|
||||
edbbusinessurl=edbbusinessurl,
|
||||
)
|
||||
|
||||
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_yuanyou_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
# df_zhibiaoshuju,df_zhibiaoliebiao = etadata.get_eta_api_pp_data(data_set=data_set,dataset=dataset) # 原始数据,未处理
|
||||
|
||||
|
||||
# 数据处理
|
||||
@ -75,7 +75,7 @@ def predict_main():
|
||||
|
||||
import datetime
|
||||
# 判断当前日期是不是周一
|
||||
is_weekday = datetime.datetime.now().weekday() == 3
|
||||
is_weekday = datetime.datetime.now().weekday() == 4
|
||||
if is_weekday:
|
||||
logger.info('今天是周一,更新预测模型')
|
||||
try:
|
||||
|
@ -289,7 +289,7 @@ def model_losss(sqlitedb):
|
||||
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
|
||||
# 去除有空值的行
|
||||
df_combined3.dropna(inplace=True)
|
||||
# df_combined3.dropna(inplace=True)
|
||||
# 保存到数据库
|
||||
df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False)
|
||||
df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False)
|
||||
@ -503,7 +503,7 @@ def model_losss_juxiting(sqlitedb):
|
||||
# 删除模型生成的cutoff列
|
||||
df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)
|
||||
# 获取模型名称
|
||||
modelnames = df_combined.columns.to_list()[2:]
|
||||
modelnames = df_combined.columns.to_list()[1:]
|
||||
if 'y' in modelnames:
|
||||
modelnames.remove('y')
|
||||
df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||
@ -534,26 +534,101 @@ def model_losss_juxiting(sqlitedb):
|
||||
with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f:
|
||||
f.write(','.join(modelnames) + '\n')
|
||||
|
||||
# 使用最佳五个模型进行绘图
|
||||
best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
|
||||
|
||||
# 根据真实值y确定最大最小值,去掉最高最低的预测值
|
||||
import heapq # 使用堆来找到最大和最小的值
|
||||
def find_min_max_within_quantile(row):
|
||||
row = row[best_models]
|
||||
q10 = row.min()
|
||||
q90 = row.max()
|
||||
# 获取 row行最大最小值模型名称
|
||||
min_model = row[row == q10].idxmin()
|
||||
max_model = row[row == q90].idxmin()
|
||||
|
||||
# # 判断flot值是否为空值
|
||||
# if pd.isna(q10) or pd.isna(q90):
|
||||
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
true_value = row['y']
|
||||
row.drop(['ds','y'], inplace=True)
|
||||
row = row.astype(float).round(2)
|
||||
|
||||
# 遍历行
|
||||
max_heap = []
|
||||
min_heap = []
|
||||
for col in row.index:
|
||||
# 对比真实值进行分类
|
||||
if row[col] < true_value:
|
||||
heapq.heappush(min_heap, row[col])
|
||||
elif row[col] > true_value:
|
||||
heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆
|
||||
|
||||
if len(max_heap) == 1:
|
||||
max_y = max_heap[0]
|
||||
elif len(max_heap) == 0:
|
||||
max_y = -min_heap[-1]
|
||||
else:
|
||||
max_y = heapq.nsmallest(2, max_heap)[1]
|
||||
|
||||
if len(min_heap) < 2 :
|
||||
min_y = -max_heap[-1]
|
||||
else:
|
||||
min_y = heapq.nsmallest(2, min_heap)[-1]
|
||||
|
||||
|
||||
# 获取最大和最小的值
|
||||
q10 = min_y
|
||||
q90 = -max_y
|
||||
|
||||
# 获取最大和最小的模型名称
|
||||
min_model = row[row == q10].idxmin()
|
||||
max_model = row[row == q90].idxmax()
|
||||
|
||||
# 设置上下界比例
|
||||
q10 = q10 * 0.99
|
||||
q90 = q90 * 0.99
|
||||
|
||||
logger.info(min_model,q10,max_model,q90)
|
||||
|
||||
return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])
|
||||
# # 遍历行
|
||||
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
df_combined = df_combined.round(4)
|
||||
print(df_combined3)
|
||||
|
||||
# 使用最佳五个模型进行绘图
|
||||
# best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
|
||||
# def find_min_max_within_quantile(row):
|
||||
# row = row[best_models]
|
||||
# q10 = row.min()
|
||||
# q90 = row.max()
|
||||
# # 获取 row行最大最小值模型名称
|
||||
# min_model = row[row == q10].idxmin()
|
||||
# max_model = row[row == q90].idxmin()
|
||||
|
||||
# # # 判断flot值是否为空值
|
||||
# # if pd.isna(q10) or pd.isna(q90):
|
||||
# return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
|
||||
# # 遍历行
|
||||
# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
# df_combined = df_combined.round(4)
|
||||
# print(df_combined3)
|
||||
|
||||
# # 通道使用模型评估前80%作为置信度
|
||||
# def find_min_max_within_quantile(row):
|
||||
# row.drop(['ds','y'], inplace=True)
|
||||
# row = row.astype(float).round(2)
|
||||
|
||||
# row_sorted = row
|
||||
# # 计算 10% 和 90% 位置的索引
|
||||
# index_10 = 0
|
||||
# index_90 = int(len(row_sorted) * 0.8)
|
||||
# q10 = row_sorted[index_10]
|
||||
# q90 = row_sorted[index_90]
|
||||
# # 获取模型名称
|
||||
# min_model = row[row == q10].idxmin()
|
||||
# max_model = row[row == q90].idxmin()
|
||||
|
||||
|
||||
# # # 判断flot值是否为空值
|
||||
# # if pd.isna(q10) or pd.isna(q90):
|
||||
# return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
|
||||
# # 重新排列
|
||||
# df_combined3 = df_combined3[['ds','y'] + allmodelnames]
|
||||
# # 遍历行
|
||||
# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
# df_combined = df_combined.round(4)
|
||||
# print(df_combined3)
|
||||
|
||||
|
||||
# # 通道使用预测模型的80%置信度
|
||||
@ -641,7 +716,7 @@ def model_losss_juxiting(sqlitedb):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.figure(figsize=(15, 10))
|
||||
# 设置有5个子图的画布
|
||||
for n,model in enumerate(modelnames):
|
||||
for n,model in enumerate(modelnames[:5]):
|
||||
plt.subplot(3, 2, n+1)
|
||||
plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')
|
||||
plt.plot(df_combined3['ds'], df_combined3[model], label=model)
|
||||
@ -671,11 +746,11 @@ def model_losss_juxiting(sqlitedb):
|
||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
||||
first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
|
||||
else:
|
||||
for col in first_row.columns:
|
||||
sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
|
||||
for row in first_row.itertuples(index=False):
|
||||
row_dict = row._asdict()
|
||||
columns=row_dict.keys()
|
||||
for col in columns:
|
||||
sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
|
||||
check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
|
||||
if len(check_query) > 0:
|
||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
@ -688,6 +763,10 @@ def model_losss_juxiting(sqlitedb):
|
||||
# 最多频率的模型名称
|
||||
min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()
|
||||
max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()
|
||||
if min_model_max_frequency_model == max_model_max_frequency_model:
|
||||
# 取20天第二多的模型
|
||||
max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1]
|
||||
|
||||
df_predict['min_model'] = min_model_max_frequency_model
|
||||
df_predict['max_model'] = max_model_max_frequency_model
|
||||
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]
|
||||
|
Loading…
Reference in New Issue
Block a user