diff --git a/config_jingbo.py b/config_jingbo.py index 38ba7fd..2f45bcc 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -178,7 +178,7 @@ is_update_report = False # 是否上传报告 # 数据截取日期 -end_time = '' # 数据截取日期 +end_time = '2024-10-28' # 数据截取日期 delweekenday = True is_corr = False # 特征是否参与滞后领先提升相关系数 add_kdj = False # 是否添加kdj指标 diff --git a/dataset/jbsh_yuanyou.db b/dataset/jbsh_yuanyou.db index d5cfb0d..a6eddaf 100644 Binary files a/dataset/jbsh_yuanyou.db and b/dataset/jbsh_yuanyou.db differ diff --git a/lib/dataread.py b/lib/dataread.py index 1320b42..e110328 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -20,8 +20,8 @@ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 from datetime import timedelta -# from config_jingbo import * -from config_juxiting import * +from config_jingbo import * +# from config_juxiting import * from sklearn import metrics from reportlab.pdfbase import pdfmetrics # 注册字体 from reportlab.pdfbase.ttfonts import TTFont # 字体类 diff --git a/main.py b/main.py index 05d09b9..66e1b0d 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ # 读取配置 -# from config_jingbo import * +from config_jingbo import * # from config_tansuanli import * -from config_juxiting import * +# from config_juxiting import * from lib.dataread import * from lib.tools import * from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting @@ -78,26 +78,31 @@ def predict_main(): is_weekday = datetime.datetime.now().weekday() == 3 if is_weekday: logger.info('今天是周一,更新预测模型') - # 计算最近20天预测残差最低的模型名称 + try: + # 计算最近20天预测残差最低的模型名称 - model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20") - model_results = model_results.dropna() - modelnames = model_results.columns.to_list()[2:] - for col in model_results[modelnames].select_dtypes(include=['object']).columns: - model_results[col] = model_results[col].astype(np.float32) - # 计算每个预测值与真实值之间的偏差率 - for model in modelnames: - model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y'] + model_results = sqlitedb.select_data('trueandpredict',order_by = "ds DESC",limit = "20") + model_results = model_results.dropna() + modelnames = model_results.columns.to_list()[2:] + for col in model_results[modelnames].select_dtypes(include=['object']).columns: + model_results[col] = model_results[col].astype(np.float32) + # 计算每个预测值与真实值之间的偏差率 + for model in modelnames: + model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y'] - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) - # 取出现次数最多的模型名称 - most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - logger.info(f"最近20天预测残差最低的模型名称:{most_common_model}") + # 获取每行对应的最小偏差率值 + min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # 获取每行对应的最小偏差率值对应的列名 + min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # 将列名索引转换为列名 + min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) + # 取出现次数最多的模型名称 + most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + logger.info(f"最近20天预测残差最低的模型名称:{most_common_model}") + except Exception as e: + logger.error(f"最近20天预测残差最低的模型名称计算失败:{e}") + # 取txt中的模型名称 + most_common_model = pd.read_csv(os.path.join(dataset,'best_modelnames.txt'),header=None).values.flatten().tolist()[0] # 保存结果到数据库 @@ -118,25 +123,25 @@ def predict_main(): row,col = df.shape now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - # ex_Model(df, - # horizon=horizon, - # input_size=input_size, - # train_steps=train_steps, - # val_check_steps=val_check_steps, - # early_stop_patience_steps=early_stop_patience_steps, - # is_debug=is_debug, - # dataset=dataset, - # is_train=is_train, - # is_fivemodels=is_fivemodels, - # val_size=val_size, - # test_size=test_size, - # settings=settings, - # now=now, - # etadata = etadata, - # modelsindex = modelsindex, - # data = data, - # is_eta=is_eta, - # ) + ex_Model(df, + horizon=horizon, + input_size=input_size, + train_steps=train_steps, + val_check_steps=val_check_steps, + early_stop_patience_steps=early_stop_patience_steps, + is_debug=is_debug, + dataset=dataset, + is_train=is_train, + is_fivemodels=is_fivemodels, + val_size=val_size, + test_size=test_size, + settings=settings, + now=now, + etadata = etadata, + modelsindex = modelsindex, + data = data, + is_eta=is_eta, + ) logger.info('模型训练完成') diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index e26296e..7bd0429 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -540,7 +540,7 @@ def model_losss_juxiting(sqlitedb): row = row[best_models] q10 = row.min() q90 = row.max() - # 获取 row行10%分位值对应的模型名称 + # 获取 row行最大最小值模型名称 min_model = row[row == q10].idxmin() max_model = row[row == q90].idxmin() @@ -559,12 +559,18 @@ def model_losss_juxiting(sqlitedb): # # 通道使用预测模型的80%置信度 # def find_min_max_within_quantile(row): # row.drop(['ds','y'], inplace=True) - # # 获取分位数10%和90%的值 - # q10 = row.quantile(0.1) - # q90 = row.quantile(0.9) - # # 获取 row行10%分位值对应的模型名称 - # min_model = row[row == q10].index[0] - # max_model = row[row == q90].index[0] + # row = row.astype(float).round(2) + + # row_sorted = row.sort_values(ascending=True).reset_index(drop=True) + # # 计算 10% 和 90% 位置的索引 + # index_10 = int(len(row_sorted) * 0.1) + # index_90 = int(len(row_sorted) * 0.9) + # q10 = row_sorted[index_10] + # q90 = row_sorted[index_90] + # # 获取模型名称 + # min_model = row[row == q10].idxmin() + # max_model = row[row == q90].idxmin() + # # # 判断flot值是否为空值 # # if pd.isna(q10) or pd.isna(q90):