3 changed files with 475 additions and 33343 deletions
--- a/dataset/jbsh_yuanyou.db
+++ b/dataset/jbsh_yuanyou.db
--- a/debugdemo.ipynb
+++ b/debugdemo.ipynb
--- a/models/nerulforcastmodels.py
+++ b/models/nerulforcastmodels.py
@ -289,7 +289,7 @@ def model_losss(sqlitedb):
    df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)

    # 去除有空值的行
-    # df_combined3.dropna(inplace=True)
+    df_combined3.dropna(inplace=True)
    # 保存到数据库
    df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False)
    df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False)
@ -503,7 +503,7 @@ def model_losss_juxiting(sqlitedb):
    # 删除模型生成的cutoff列
    df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)
    # 获取模型名称
-    modelnames  = df_combined.columns.to_list()[1:] 
+    modelnames  = df_combined.columns.to_list()[2:] 
    if 'y' in modelnames:
        modelnames.remove('y')
    df_combined3 = df_combined.copy()  # 备份df_combined,后面画图需要
@ -534,101 +534,26 @@ def model_losss_juxiting(sqlitedb):
    with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f:
        f.write(','.join(modelnames) + '\n')

-    
-    # 根据真实值y确定最大最小值,去掉最高最低的预测值
-    import heapq          # 使用堆来找到最大和最小的值
+    # 使用最佳五个模型进行绘图
+    best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
    def find_min_max_within_quantile(row):
-        true_value = row['y']
-        row.drop(['ds','y'], inplace=True)
-        row = row.astype(float).round(2)
-
-        max_heap = []
-        min_heap = []
-        for col in row.index:
-            # 对比真实值进行分类
-            if row[col] < true_value:
-                heapq.heappush(min_heap, row[col])
-            elif row[col] > true_value:
-                heapq.heappush(max_heap, -row[col])  # 使用负号来实现最大堆
-
-        if len(max_heap) == 1:
-            max_y = max_heap[0]
-        elif len(max_heap) == 0:
-            max_y = -min_heap[-1]
-        else:
-            max_y = heapq.nsmallest(2, max_heap)[1]
-
-        if len(min_heap) < 2 :
-            min_y = -max_heap[-1]
-        else:
-            min_y = heapq.nsmallest(2, min_heap)[-1]
-
-
-        # 获取最大和最小的值
-        q10 = min_y 
-        q90 = -max_y
-
-        # 获取最大和最小的模型名称
+        row = row[best_models]
+        q10 = row.min()
+        q90 = row.max()
+        # 获取 row行最大最小值模型名称
        min_model = row[row == q10].idxmin()
-        max_model = row[row == q90].idxmax()
+        max_model = row[row == q90].idxmin()
+        
+        # # 判断flot值是否为空值
+        # if pd.isna(q10) or pd.isna(q90):
+        return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])

-        # 设置上下界比例
-        q10 = q10 * 0.99
-        q90 = q90 * 0.99
-
-        logger.info(min_model,q10,max_model,q90)
-
-        return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model'])
-    # # 遍历行
+    # 遍历行
    df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
    df_combined = df_combined.round(4)
    print(df_combined3)

-    # 使用最佳五个模型进行绘图
-    # best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()
-    # def find_min_max_within_quantile(row):
-    #     row = row[best_models]
-    #     q10 = row.min()
-    #     q90 = row.max()
-    #     # 获取 row行最大最小值模型名称
-    #     min_model = row[row == q10].idxmin()
-    #     max_model = row[row == q90].idxmin()
-        
-    #     # # 判断flot值是否为空值
-    #     # if pd.isna(q10) or pd.isna(q90):
-    #     return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])

-    # # 遍历行
-    # df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
-    # df_combined = df_combined.round(4)
-    # print(df_combined3)
-
-    # # 通道使用模型评估前80%作为置信度
-    # def find_min_max_within_quantile(row):
-    #     row.drop(['ds','y'], inplace=True)
-    #     row = row.astype(float).round(2)
-
-    #     row_sorted = row
-    #     # 计算 10% 和 90% 位置的索引
-    #     index_10 = 0
-    #     index_90 = int(len(row_sorted) * 0.8)
-    #     q10 = row_sorted[index_10]
-    #     q90 = row_sorted[index_90]
-    #     # 获取模型名称
-    #     min_model = row[row == q10].idxmin()
-    #     max_model = row[row == q90].idxmin()
-
-        
-    #     # # 判断flot值是否为空值
-    #     # if pd.isna(q10) or pd.isna(q90):
-    #     return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model'])
-
-    # # 重新排列
-    # df_combined3 = df_combined3[['ds','y'] + allmodelnames]
-    # # 遍历行
-    # df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1)
-    # df_combined = df_combined.round(4)
-    # print(df_combined3)


    # # 通道使用预测模型的80%置信度
@ -716,7 +641,7 @@ def model_losss_juxiting(sqlitedb):
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.figure(figsize=(15, 10))
    # 设置有5个子图的画布
-    for n,model in enumerate(modelnames[:5]):
+    for n,model in enumerate(modelnames):
        plt.subplot(3, 2, n+1)
        plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')
        plt.plot(df_combined3['ds'], df_combined3[model], label=model)
@ -746,11 +671,11 @@ def model_losss_juxiting(sqlitedb):
    if not sqlitedb.check_table_exists('trueandpredict'):
        first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
    else:
-        for col in first_row.columns:
-                sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
        for row in first_row.itertuples(index=False):
            row_dict = row._asdict()
            columns=row_dict.keys()
+            for col in columns:
+                sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
            check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
            if len(check_query) > 0:
                set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
@ -763,10 +688,6 @@ def model_losss_juxiting(sqlitedb):
    # 最多频率的模型名称
    min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax()
    max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax()
-    if min_model_max_frequency_model == max_model_max_frequency_model:
-        # 取20天第二多的模型
-        max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1]
-
    df_predict['min_model'] = min_model_max_frequency_model
    df_predict['max_model'] = max_model_max_frequency_model
    df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]