准确率计算数据入库

This commit is contained in:
liurui 2024-12-09 15:07:51 +08:00
parent 363da754b2
commit af408caef1
3 changed files with 68 additions and 60 deletions

View File

@ -197,10 +197,10 @@ warning_data = {
### 开关
is_train = True # 是否训练
is_debug = False # 是否调试
is_debug = True # 是否调试
is_eta = False # 是否使用eta接口
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
is_fivemodels = True # 是否使用之前保存的最佳的5个模型
is_edbcode = True # 特征使用edbcoding列表中的
is_edbnamelist = False # 自定义特征对应上面的edbnamelist
is_update_eta = False # 预测结果上传到eta

View File

@ -109,34 +109,34 @@ def predict_main():
import datetime
# 判断当前日期是不是周一
is_weekday = datetime.datetime.now().weekday() == 0
if is_weekday:
logger.info('今天是周一,更新预测模型')
# 计算最近60天预测残差最低的模型名称
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
# 删除空值率为40%以上的列
if len(model_results) > 10:
model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)
# 删除空行
model_results = model_results.dropna()
modelnames = model_results.columns.to_list()[2:]
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
model_results[col] = model_results[col].astype(np.float32)
# 计算每个预测值与真实值之间的偏差率
for model in modelnames:
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
# 获取每行对应的最小偏差率值
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# 获取每行对应的最小偏差率值对应的列名
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
# 将列名索引转换为列名
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
# 取出现次数最多的模型名称
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
logger.info(f"最近60天预测残差最低的模型名称{most_common_model}")
# 保存结果到数据库
if not sqlitedb.check_table_exists('most_model'):
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
# if is_weekday:
# logger.info('今天是周一,更新预测模型')
# # 计算最近60天预测残差最低的模型名称
# model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
# # 删除空值率为40%以上的列
# if len(model_results) > 10:
# model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)
# # 删除空行
# model_results = model_results.dropna()
# modelnames = model_results.columns.to_list()[2:]
# for col in model_results[modelnames].select_dtypes(include=['object']).columns:
# model_results[col] = model_results[col].astype(np.float32)
# # 计算每个预测值与真实值之间的偏差率
# for model in modelnames:
# model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
# # 获取每行对应的最小偏差率值
# min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# # 获取每行对应的最小偏差率值对应的列名
# min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
# # 将列名索引转换为列名
# min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
# # 取出现次数最多的模型名称
# most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
# logger.info(f"最近60天预测残差最低的模型名称{most_common_model}")
# # 保存结果到数据库
# if not sqlitedb.check_table_exists('most_model'):
# sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
# sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
try:
if is_weekday:
@ -172,25 +172,25 @@ def predict_main():
row, col = df.shape
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
ex_Model(df,
horizon=horizon,
input_size=input_size,
train_steps=train_steps,
val_check_steps=val_check_steps,
early_stop_patience_steps=early_stop_patience_steps,
is_debug=is_debug,
dataset=dataset,
is_train=is_train,
is_fivemodels=is_fivemodels,
val_size=val_size,
test_size=test_size,
settings=settings,
now=now,
etadata=etadata,
modelsindex=modelsindex,
data=data,
is_eta=is_eta,
)
# ex_Model(df,
# horizon=horizon,
# input_size=input_size,
# train_steps=train_steps,
# val_check_steps=val_check_steps,
# early_stop_patience_steps=early_stop_patience_steps,
# is_debug=is_debug,
# dataset=dataset,
# is_train=is_train,
# is_fivemodels=is_fivemodels,
# val_size=val_size,
# test_size=test_size,
# settings=settings,
# now=now,
# etadata=etadata,
# modelsindex=modelsindex,
# data=data,
# is_eta=is_eta,
# )
logger.info('模型训练完成')

View File

@ -201,18 +201,7 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
df_predict.to_csv(os.path.join(dataset,"predict.csv"),index=False)
df_predict2 = df_predict.copy()
# 保存到数据库
if not sqlitedb.check_table_exists('accuracy'):
sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ")
existing_data = sqlitedb.execute_query("SELECT * FROM accuracy")
if not existing_data.empty:
max_id = existing_data['id'].astype(int).max()
df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
else:
df_predict2['id'] = range(1, 1 + len(df_predict2))
df_predict2.to_sql(table_name, con=sqlitedb.connect, if_exists='append', index=False)
# 把预测值上传到eta
if is_update_eta:
dates = df_predict['ds'].dt.strftime('%Y-%m-%d')
@ -384,6 +373,25 @@ def model_losss(sqlitedb):
df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])
df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')
# 保存到数据库
if not sqlitedb.check_table_exists('accuracy'):
sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ")
existing_data = sqlitedb.select_data(table_name = "accuracy")
if not existing_data.empty:
max_id = existing_data['id'].astype(int).max()
df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
else:
df_predict2['id'] = range(1, 1 + len(df_predict2))
df_predict2['CREAT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d')
df_predict2['PREDICT_DATE'] = df_predict2['ds']
df_predict2['MIN_PRICE'] = df_predict2['min_within_quantile']
df_predict2['MAX_PRICE'] = df_predict2['max_within_quantile']
print(df_predict2.clumns)
df_predict2 = df_predict2[['id','PREDICT_DATE','CREAT_DATE','MIN_PRICE','MAX_PRICE']]
df_predict2.to_sql(table_name, con=sqlitedb.connection, if_exists='append', index=False)
def _add_abs_error_rate():
# 计算每个预测值与真实值之间的偏差率