Compare commits
	
		
			2 Commits
		
	
	
		
			f10d04e661
			...
			e39d90a503
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | e39d90a503 | ||
|  | 83589bed46 | 
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										33705
									
								
								debugdemo.ipynb
									
									
									
									
									
								
							
							
						
						
									
										33705
									
								
								debugdemo.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -289,7 +289,7 @@ def model_losss(sqlitedb): | ||||
|     df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) | ||||
| 
 | ||||
|     # 去除有空值的行 | ||||
|     df_combined3.dropna(inplace=True) | ||||
|     # df_combined3.dropna(inplace=True) | ||||
|     # 保存到数据库 | ||||
|     df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False) | ||||
|     df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False) | ||||
| @ -503,7 +503,7 @@ def model_losss_juxiting(sqlitedb): | ||||
|     # 删除模型生成的cutoff列 | ||||
|     df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True) | ||||
|     # 获取模型名称 | ||||
|     modelnames  = df_combined.columns.to_list()[2:]  | ||||
|     modelnames  = df_combined.columns.to_list()[1:]  | ||||
|     if 'y' in modelnames: | ||||
|         modelnames.remove('y') | ||||
|     df_combined3 = df_combined.copy()  # 备份df_combined,后面画图需要 | ||||
| @ -534,26 +534,101 @@ def model_losss_juxiting(sqlitedb): | ||||
|     with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f: | ||||
|         f.write(','.join(modelnames) + '\n') | ||||
| 
 | ||||
|     # 使用最佳五个模型进行绘图 | ||||
|     best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist() | ||||
|      | ||||
|     # 根据真实值y确定最大最小值,去掉最高最低的预测值 | ||||
|     import heapq          # 使用堆来找到最大和最小的值 | ||||
|     def find_min_max_within_quantile(row): | ||||
|         row = row[best_models] | ||||
|         q10 = row.min() | ||||
|         q90 = row.max() | ||||
|         # 获取 row行最大最小值模型名称 | ||||
|         min_model = row[row == q10].idxmin() | ||||
|         max_model = row[row == q90].idxmin() | ||||
|          | ||||
|         # # 判断flot值是否为空值 | ||||
|         # if pd.isna(q10) or pd.isna(q90): | ||||
|         return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model']) | ||||
|         true_value = row['y'] | ||||
|         row.drop(['ds','y'], inplace=True) | ||||
|         row = row.astype(float).round(2) | ||||
| 
 | ||||
|     # 遍历行 | ||||
|         max_heap = [] | ||||
|         min_heap = [] | ||||
|         for col in row.index: | ||||
|             # 对比真实值进行分类 | ||||
|             if row[col] < true_value: | ||||
|                 heapq.heappush(min_heap, row[col]) | ||||
|             elif row[col] > true_value: | ||||
|                 heapq.heappush(max_heap, -row[col])  # 使用负号来实现最大堆 | ||||
| 
 | ||||
|         if len(max_heap) == 1: | ||||
|             max_y = max_heap[0] | ||||
|         elif len(max_heap) == 0: | ||||
|             max_y = -min_heap[-1] | ||||
|         else: | ||||
|             max_y = heapq.nsmallest(2, max_heap)[1] | ||||
| 
 | ||||
|         if len(min_heap) < 2 : | ||||
|             min_y = -max_heap[-1] | ||||
|         else: | ||||
|             min_y = heapq.nsmallest(2, min_heap)[-1] | ||||
| 
 | ||||
| 
 | ||||
|         # 获取最大和最小的值 | ||||
|         q10 = min_y  | ||||
|         q90 = -max_y | ||||
| 
 | ||||
|         # 获取最大和最小的模型名称 | ||||
|         min_model = row[row == q10].idxmin() | ||||
|         max_model = row[row == q90].idxmax() | ||||
| 
 | ||||
|         # 设置上下界比例 | ||||
|         q10 = q10 * 0.99 | ||||
|         q90 = q90 * 0.99 | ||||
| 
 | ||||
|         logger.info(min_model,q10,max_model,q90) | ||||
| 
 | ||||
|         return pd.Series([q10, q90, min_model, max_model], index=['min_within_quantile', 'max_within_quantile', 'min_model', 'max_model']) | ||||
|     # # 遍历行 | ||||
|     df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) | ||||
|     df_combined = df_combined.round(4) | ||||
|     print(df_combined3) | ||||
| 
 | ||||
|     # 使用最佳五个模型进行绘图 | ||||
|     # best_models = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist() | ||||
|     # def find_min_max_within_quantile(row): | ||||
|     #     row = row[best_models] | ||||
|     #     q10 = row.min() | ||||
|     #     q90 = row.max() | ||||
|     #     # 获取 row行最大最小值模型名称 | ||||
|     #     min_model = row[row == q10].idxmin() | ||||
|     #     max_model = row[row == q90].idxmin() | ||||
|          | ||||
|     #     # # 判断flot值是否为空值 | ||||
|     #     # if pd.isna(q10) or pd.isna(q90): | ||||
|     #     return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model']) | ||||
| 
 | ||||
|     # # 遍历行 | ||||
|     # df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) | ||||
|     # df_combined = df_combined.round(4) | ||||
|     # print(df_combined3) | ||||
| 
 | ||||
|     # # 通道使用模型评估前80%作为置信度 | ||||
|     # def find_min_max_within_quantile(row): | ||||
|     #     row.drop(['ds','y'], inplace=True) | ||||
|     #     row = row.astype(float).round(2) | ||||
| 
 | ||||
|     #     row_sorted = row | ||||
|     #     # 计算 10% 和 90% 位置的索引 | ||||
|     #     index_10 = 0 | ||||
|     #     index_90 = int(len(row_sorted) * 0.8) | ||||
|     #     q10 = row_sorted[index_10] | ||||
|     #     q90 = row_sorted[index_90] | ||||
|     #     # 获取模型名称 | ||||
|     #     min_model = row[row == q10].idxmin() | ||||
|     #     max_model = row[row == q90].idxmin() | ||||
| 
 | ||||
|          | ||||
|     #     # # 判断flot值是否为空值 | ||||
|     #     # if pd.isna(q10) or pd.isna(q90): | ||||
|     #     return pd.Series([q10, q90,min_model,max_model], index=['min_within_quantile','max_within_quantile','min_model','max_model']) | ||||
| 
 | ||||
|     # # 重新排列 | ||||
|     # df_combined3 = df_combined3[['ds','y'] + allmodelnames] | ||||
|     # # 遍历行 | ||||
|     # df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) | ||||
|     # df_combined = df_combined.round(4) | ||||
|     # print(df_combined3) | ||||
| 
 | ||||
| 
 | ||||
|     # # 通道使用预测模型的80%置信度 | ||||
| @ -641,7 +716,7 @@ def model_losss_juxiting(sqlitedb): | ||||
|     plt.rcParams['font.sans-serif'] = ['SimHei'] | ||||
|     plt.figure(figsize=(15, 10)) | ||||
|     # 设置有5个子图的画布 | ||||
|     for n,model in enumerate(modelnames): | ||||
|     for n,model in enumerate(modelnames[:5]): | ||||
|         plt.subplot(3, 2, n+1) | ||||
|         plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') | ||||
|         plt.plot(df_combined3['ds'], df_combined3[model], label=model) | ||||
| @ -671,11 +746,11 @@ def model_losss_juxiting(sqlitedb): | ||||
|     if not sqlitedb.check_table_exists('trueandpredict'): | ||||
|         first_row.to_sql('trueandpredict',sqlitedb.connection,index=False) | ||||
|     else: | ||||
|         for col in first_row.columns: | ||||
|                 sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') | ||||
|         for row in first_row.itertuples(index=False): | ||||
|             row_dict = row._asdict() | ||||
|             columns=row_dict.keys() | ||||
|             for col in columns: | ||||
|                 sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') | ||||
|             check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'") | ||||
|             if len(check_query) > 0: | ||||
|                 set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) | ||||
| @ -688,6 +763,10 @@ def model_losss_juxiting(sqlitedb): | ||||
|     # 最多频率的模型名称 | ||||
|     min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax() | ||||
|     max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax() | ||||
|     if min_model_max_frequency_model == max_model_max_frequency_model: | ||||
|         # 取20天第二多的模型 | ||||
|         max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1] | ||||
| 
 | ||||
|     df_predict['min_model'] = min_model_max_frequency_model | ||||
|     df_predict['max_model'] = max_model_max_frequency_model | ||||
|     df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user