From af408caef12864c30985f787d072dfbe796bbc36 Mon Sep 17 00:00:00 2001 From: liurui Date: Mon, 9 Dec 2024 15:07:51 +0800 Subject: [PATCH] =?UTF-8?q?=E5=87=86=E7=A1=AE=E7=8E=87=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=85=A5=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config_jingbo.py | 4 +- main_yuanyou.py | 94 ++++++++++++++++++------------------ models/nerulforcastmodels.py | 30 +++++++----- 3 files changed, 68 insertions(+), 60 deletions(-) diff --git a/config_jingbo.py b/config_jingbo.py index 13a4349..d2cdeb4 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -197,10 +197,10 @@ warning_data = { ### 开关 is_train = True # 是否训练 -is_debug = False # 是否调试 +is_debug = True # 是否调试 is_eta = False # 是否使用eta接口 is_timefurture = True # 是否使用时间特征 -is_fivemodels = False # 是否使用之前保存的最佳的5个模型 +is_fivemodels = True # 是否使用之前保存的最佳的5个模型 is_edbcode = True # 特征使用edbcoding列表中的 is_edbnamelist = False # 自定义特征,对应上面的edbnamelist is_update_eta = False # 预测结果上传到eta diff --git a/main_yuanyou.py b/main_yuanyou.py index 8900fad..a1616ea 100644 --- a/main_yuanyou.py +++ b/main_yuanyou.py @@ -109,34 +109,34 @@ def predict_main(): import datetime # 判断当前日期是不是周一 is_weekday = datetime.datetime.now().weekday() == 0 - if is_weekday: - logger.info('今天是周一,更新预测模型') - # 计算最近60天预测残差最低的模型名称 - model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60") - # 删除空值率为40%以上的列 - if len(model_results) > 10: - model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1) - # 删除空行 - model_results = model_results.dropna() - modelnames = model_results.columns.to_list()[2:] - for col in model_results[modelnames].select_dtypes(include=['object']).columns: - model_results[col] = model_results[col].astype(np.float32) - # 计算每个预测值与真实值之间的偏差率 - for model in modelnames: - model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y'] - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) - # 取出现次数最多的模型名称 - most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() - logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") - # 保存结果到数据库 - if not sqlitedb.check_table_exists('most_model'): - sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT") - sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) + # if is_weekday: + # logger.info('今天是周一,更新预测模型') + # # 计算最近60天预测残差最低的模型名称 + # model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60") + # # 删除空值率为40%以上的列 + # if len(model_results) > 10: + # model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1) + # # 删除空行 + # model_results = model_results.dropna() + # modelnames = model_results.columns.to_list()[2:] + # for col in model_results[modelnames].select_dtypes(include=['object']).columns: + # model_results[col] = model_results[col].astype(np.float32) + # # 计算每个预测值与真实值之间的偏差率 + # for model in modelnames: + # model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y'] + # # 获取每行对应的最小偏差率值 + # min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) + # # 获取每行对应的最小偏差率值对应的列名 + # min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1) + # # 将列名索引转换为列名 + # min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) + # # 取出现次数最多的模型名称 + # most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() + # logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}") + # # 保存结果到数据库 + # if not sqlitedb.check_table_exists('most_model'): + # sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT") + # sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) try: if is_weekday: @@ -172,25 +172,25 @@ def predict_main(): row, col = df.shape now = datetime.datetime.now().strftime('%Y%m%d%H%M%S') - ex_Model(df, - horizon=horizon, - input_size=input_size, - train_steps=train_steps, - val_check_steps=val_check_steps, - early_stop_patience_steps=early_stop_patience_steps, - is_debug=is_debug, - dataset=dataset, - is_train=is_train, - is_fivemodels=is_fivemodels, - val_size=val_size, - test_size=test_size, - settings=settings, - now=now, - etadata=etadata, - modelsindex=modelsindex, - data=data, - is_eta=is_eta, - ) + # ex_Model(df, + # horizon=horizon, + # input_size=input_size, + # train_steps=train_steps, + # val_check_steps=val_check_steps, + # early_stop_patience_steps=early_stop_patience_steps, + # is_debug=is_debug, + # dataset=dataset, + # is_train=is_train, + # is_fivemodels=is_fivemodels, + # val_size=val_size, + # test_size=test_size, + # settings=settings, + # now=now, + # etadata=etadata, + # modelsindex=modelsindex, + # data=data, + # is_eta=is_eta, + # ) logger.info('模型训练完成') diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index d201f7e..1cee451 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -201,18 +201,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien df_predict.to_csv(os.path.join(dataset,"predict.csv"),index=False) df_predict2 = df_predict.copy() - # 保存到数据库 - if not sqlitedb.check_table_exists('accuracy'): - sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ") - existing_data = sqlitedb.execute_query("SELECT * FROM accuracy") - if not existing_data.empty: - max_id = existing_data['id'].astype(int).max() - df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2)) - else: - df_predict2['id'] = range(1, 1 + len(df_predict2)) - df_predict2.to_sql(table_name, con=sqlitedb.connect, if_exists='append', index=False) - # 把预测值上传到eta if is_update_eta: dates = df_predict['ds'].dt.strftime('%Y-%m-%d') @@ -384,6 +373,25 @@ def model_losss(sqlitedb): df_predict2['ds'] = pd.to_datetime(df_predict2['ds']) df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') + # 保存到数据库 + if not sqlitedb.check_table_exists('accuracy'): + sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ") + existing_data = sqlitedb.select_data(table_name = "accuracy") + if not existing_data.empty: + max_id = existing_data['id'].astype(int).max() + df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2)) + else: + df_predict2['id'] = range(1, 1 + len(df_predict2)) + df_predict2['CREAT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d') + df_predict2['PREDICT_DATE'] = df_predict2['ds'] + df_predict2['MIN_PRICE'] = df_predict2['min_within_quantile'] + df_predict2['MAX_PRICE'] = df_predict2['max_within_quantile'] + print(df_predict2.clumns) + df_predict2 = df_predict2[['id','PREDICT_DATE','CREAT_DATE','MIN_PRICE','MAX_PRICE']] + df_predict2.to_sql(table_name, con=sqlitedb.connection, if_exists='append', index=False) + + + def _add_abs_error_rate(): # 计算每个预测值与真实值之间的偏差率