Compare commits
No commits in common. "e39d90a503f234122db4bc5ac1aae7b8576fe66e" and "f10d04e661cd9ecfe726167e7fa117b840a650eb" have entirely different histories.
e39d90a503
...
f10d04e661
Binary file not shown.
33705
debugdemo.ipynb
33705
debugdemo.ipynb
File diff suppressed because it is too large
Load Diff
@ -289,7 +289,7 @@ def model_losss(sqlitedb):
|
||||
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
|
||||
# 去除有空值的行
|
||||
# df_combined3.dropna(inplace=True)
|
||||
df_combined3.dropna(inplace=True)
|
||||
# 保存到数据库
|
||||
df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False)
|
||||
df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False)
|
||||
@ -503,7 +503,7 @@ def model_losss_juxiting(sqlitedb):
|
||||
# 删除模型生成的cutoff列
|
||||
df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)
|
||||
# 获取模型名称
|
||||
modelnames = df_combined.columns.to_list()[1:]
|
||||
modelnames = df_combined.columns.to_list()[2:]
|
||||
if 'y' in modelnames:
|
||||
modelnames.remove('y')
|
||||
df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||
@ -534,101 +534,26 @@ def model_losss_juxiting(sqlitedb):
|
||||
with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f:
|
||||
f.write(','.join(modelnames) + '\n')
|
||||
|
||||
|
||||
# 根据真实值y确定最大最小值,去掉最高最低的预测值
|
||||
import heapq # 使用堆来找到最大和最小的值
|
||||
# 使用最佳五个模型进行绘图
|
||||
best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
|
||||
def find_min_max_within_quantile(row):
|
||||
true_value = row['y']
|
||||
row.drop(['ds','y'], inplace=True)
|
||||
row = row.astype(float).round(2)
|
||||
|
||||
max_heap = []
|
||||
min_heap = []
|
||||
for col in row.index:
|
||||
# 对比真实值进行分类
|
||||
if row[col] < true_value:
|
||||
heapq.heappush(min_heap, row[col])
|
||||
elif row[col] > true_value:
|
||||
heapq.heappush(max_heap, -row[col]) # 使用负号来实现最大堆
|
||||
|
||||
if len(max_heap) == 1:
|
||||
max_y = max_heap[0]
|
||||
elif len(max_heap) == 0:
|
||||
max_y = -min_heap[-1]
|
||||
else:
|
||||
max_y = heapq.nsmallest(2, max_heap)[1]
|
||||
|
||||
if len(min_heap) < 2 :
|
||||
min_y = -max_heap[-1]
|
||||
else:
|
||||
min_y = heapq.nsmallest(2, min_heap)[-1]
|
||||
|
||||
|
||||
# 获取最大和最小的值
|
||||
q10 = min_y
|
||||
q90 = -max_y
|
||||
|
||||
# 获取最大和最小的模型名称
|
||||
row = row[best_models]
|
||||
q10 = row.min()
|
||||
q90 = row.max()
|
||||
# 获取 row行最大最小值模型名称
|
||||
min_model = row[row == q10].idxmin()
|
||||
max_model = row[row == q90].idxmax()
|
||||
max_model = row[row == q90].idxmin()
|
||||
|
||||
# # 判断flot值是否为空值
|
||||
# if pd.isna(q10) or pd.isna(q90):
|
||||
return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
|
||||
# 设置上下界比例
|
||||
q10 = q10 * 0.99
|
||||
q90 = q90 * 0.99
|
||||
|
||||
logger.info(min_model,q10,max_model,q90)
|
||||
|
||||
return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])
|
||||
# # 遍历行
|
||||
# 遍历行
|
||||
df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
df_combined = df_combined.round(4)
|
||||
print(df_combined3)
|
||||
|
||||
# 使用最佳五个模型进行绘图
|
||||
# best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
|
||||
# def find_min_max_within_quantile(row):
|
||||
# row = row[best_models]
|
||||
# q10 = row.min()
|
||||
# q90 = row.max()
|
||||
# # 获取 row行最大最小值模型名称
|
||||
# min_model = row[row == q10].idxmin()
|
||||
# max_model = row[row == q90].idxmin()
|
||||
|
||||
# # # 判断flot值是否为空值
|
||||
# # if pd.isna(q10) or pd.isna(q90):
|
||||
# return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
|
||||
# # 遍历行
|
||||
# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
# df_combined = df_combined.round(4)
|
||||
# print(df_combined3)
|
||||
|
||||
# # 通道使用模型评估前80%作为置信度
|
||||
# def find_min_max_within_quantile(row):
|
||||
# row.drop(['ds','y'], inplace=True)
|
||||
# row = row.astype(float).round(2)
|
||||
|
||||
# row_sorted = row
|
||||
# # 计算 10% 和 90% 位置的索引
|
||||
# index_10 = 0
|
||||
# index_90 = int(len(row_sorted) * 0.8)
|
||||
# q10 = row_sorted[index_10]
|
||||
# q90 = row_sorted[index_90]
|
||||
# # 获取模型名称
|
||||
# min_model = row[row == q10].idxmin()
|
||||
# max_model = row[row == q90].idxmin()
|
||||
|
||||
|
||||
# # # 判断flot值是否为空值
|
||||
# # if pd.isna(q10) or pd.isna(q90):
|
||||
# return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
|
||||
|
||||
# # 重新排列
|
||||
# df_combined3 = df_combined3[['ds','y'] + allmodelnames]
|
||||
# # 遍历行
|
||||
# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
|
||||
# df_combined = df_combined.round(4)
|
||||
# print(df_combined3)
|
||||
|
||||
|
||||
# # 通道使用预测模型的80%置信度
|
||||
@ -716,7 +641,7 @@ def model_losss_juxiting(sqlitedb):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.figure(figsize=(15, 10))
|
||||
# 设置有5个子图的画布
|
||||
for n,model in enumerate(modelnames[:5]):
|
||||
for n,model in enumerate(modelnames):
|
||||
plt.subplot(3, 2, n+1)
|
||||
plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')
|
||||
plt.plot(df_combined3['ds'], df_combined3[model], label=model)
|
||||
@ -746,11 +671,11 @@ def model_losss_juxiting(sqlitedb):
|
||||
if not sqlitedb.check_table_exists('trueandpredict'):
|
||||
first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
|
||||
else:
|
||||
for col in first_row.columns:
|
||||
sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
|
||||
for row in first_row.itertuples(index=False):
|
||||
row_dict = row._asdict()
|
||||
columns=row_dict.keys()
|
||||
for col in columns:
|
||||
sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
|
||||
check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
|
||||
if len(check_query) > 0:
|
||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
@ -763,10 +688,6 @@ def model_losss_juxiting(sqlitedb):
|
||||
# 最多频率的模型名称
|
||||
min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()
|
||||
max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()
|
||||
if min_model_max_frequency_model == max_model_max_frequency_model:
|
||||
# 取20天第二多的模型
|
||||
max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1]
|
||||
|
||||
df_predict['min_model'] = min_model_max_frequency_model
|
||||
df_predict['max_model'] = max_model_max_frequency_model
|
||||
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]
|
||||
|
Loading…
Reference in New Issue
Block a user