准确率计算数据入库
This commit is contained in:
parent
363da754b2
commit
af408caef1
@ -197,10 +197,10 @@ warning_data = {
|
||||
|
||||
### 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = False # 是否调试
|
||||
is_debug = True # 是否调试
|
||||
is_eta = False # 是否使用eta接口
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
is_fivemodels = True # 是否使用之前保存的最佳的5个模型
|
||||
is_edbcode = True # 特征使用edbcoding列表中的
|
||||
is_edbnamelist = False # 自定义特征,对应上面的edbnamelist
|
||||
is_update_eta = False # 预测结果上传到eta
|
||||
|
@ -109,34 +109,34 @@ def predict_main():
|
||||
import datetime
|
||||
# 判断当前日期是不是周一
|
||||
is_weekday = datetime.datetime.now().weekday() == 0
|
||||
if is_weekday:
|
||||
logger.info('今天是周一,更新预测模型')
|
||||
# 计算最近60天预测残差最低的模型名称
|
||||
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
|
||||
# 删除空值率为40%以上的列
|
||||
if len(model_results) > 10:
|
||||
model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)
|
||||
# 删除空行
|
||||
model_results = model_results.dropna()
|
||||
modelnames = model_results.columns.to_list()[2:]
|
||||
for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||
model_results[col] = model_results[col].astype(np.float32)
|
||||
# 计算每个预测值与真实值之间的偏差率
|
||||
for model in modelnames:
|
||||
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
|
||||
# 获取每行对应的最小偏差率值
|
||||
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
||||
# 获取每行对应的最小偏差率值对应的列名
|
||||
min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
||||
# 将列名索引转换为列名
|
||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
||||
# 取出现次数最多的模型名称
|
||||
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
||||
logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
||||
# 保存结果到数据库
|
||||
if not sqlitedb.check_table_exists('most_model'):
|
||||
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
|
||||
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
||||
# if is_weekday:
|
||||
# logger.info('今天是周一,更新预测模型')
|
||||
# # 计算最近60天预测残差最低的模型名称
|
||||
# model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
|
||||
# # 删除空值率为40%以上的列
|
||||
# if len(model_results) > 10:
|
||||
# model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)
|
||||
# # 删除空行
|
||||
# model_results = model_results.dropna()
|
||||
# modelnames = model_results.columns.to_list()[2:]
|
||||
# for col in model_results[modelnames].select_dtypes(include=['object']).columns:
|
||||
# model_results[col] = model_results[col].astype(np.float32)
|
||||
# # 计算每个预测值与真实值之间的偏差率
|
||||
# for model in modelnames:
|
||||
# model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
|
||||
# # 获取每行对应的最小偏差率值
|
||||
# min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
|
||||
# # 获取每行对应的最小偏差率值对应的列名
|
||||
# min_abs_error_rate_column_name = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].idxmin(), axis=1)
|
||||
# # 将列名索引转换为列名
|
||||
# min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
||||
# # 取出现次数最多的模型名称
|
||||
# most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
|
||||
# logger.info(f"最近60天预测残差最低的模型名称:{most_common_model}")
|
||||
# # 保存结果到数据库
|
||||
# if not sqlitedb.check_table_exists('most_model'):
|
||||
# sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
|
||||
# sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
|
||||
|
||||
try:
|
||||
if is_weekday:
|
||||
@ -172,25 +172,25 @@ def predict_main():
|
||||
row, col = df.shape
|
||||
|
||||
now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
||||
ex_Model(df,
|
||||
horizon=horizon,
|
||||
input_size=input_size,
|
||||
train_steps=train_steps,
|
||||
val_check_steps=val_check_steps,
|
||||
early_stop_patience_steps=early_stop_patience_steps,
|
||||
is_debug=is_debug,
|
||||
dataset=dataset,
|
||||
is_train=is_train,
|
||||
is_fivemodels=is_fivemodels,
|
||||
val_size=val_size,
|
||||
test_size=test_size,
|
||||
settings=settings,
|
||||
now=now,
|
||||
etadata=etadata,
|
||||
modelsindex=modelsindex,
|
||||
data=data,
|
||||
is_eta=is_eta,
|
||||
)
|
||||
# ex_Model(df,
|
||||
# horizon=horizon,
|
||||
# input_size=input_size,
|
||||
# train_steps=train_steps,
|
||||
# val_check_steps=val_check_steps,
|
||||
# early_stop_patience_steps=early_stop_patience_steps,
|
||||
# is_debug=is_debug,
|
||||
# dataset=dataset,
|
||||
# is_train=is_train,
|
||||
# is_fivemodels=is_fivemodels,
|
||||
# val_size=val_size,
|
||||
# test_size=test_size,
|
||||
# settings=settings,
|
||||
# now=now,
|
||||
# etadata=etadata,
|
||||
# modelsindex=modelsindex,
|
||||
# data=data,
|
||||
# is_eta=is_eta,
|
||||
# )
|
||||
|
||||
|
||||
logger.info('模型训练完成')
|
||||
|
@ -201,17 +201,6 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien
|
||||
df_predict.to_csv(os.path.join(dataset,"predict.csv"),index=False)
|
||||
|
||||
df_predict2 = df_predict.copy()
|
||||
# 保存到数据库
|
||||
if not sqlitedb.check_table_exists('accuracy'):
|
||||
sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ")
|
||||
existing_data = sqlitedb.execute_query("SELECT * FROM accuracy")
|
||||
if not existing_data.empty:
|
||||
max_id = existing_data['id'].astype(int).max()
|
||||
df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
|
||||
else:
|
||||
df_predict2['id'] = range(1, 1 + len(df_predict2))
|
||||
df_predict2.to_sql(table_name, con=sqlitedb.connect, if_exists='append', index=False)
|
||||
|
||||
|
||||
# 把预测值上传到eta
|
||||
if is_update_eta:
|
||||
@ -384,6 +373,25 @@ def model_losss(sqlitedb):
|
||||
df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])
|
||||
df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')
|
||||
|
||||
# 保存到数据库
|
||||
if not sqlitedb.check_table_exists('accuracy'):
|
||||
sqlitedb.create_table('accuracy', columns="id int,PREDICT_DATE datetime,CREAT_DATE datetime, MIN_PRICE TEXT,MAX_PRICE TEXT,HIGH_PRICE TEXT,LOW_PRICE TEXT,RIGHT_ROTE ")
|
||||
existing_data = sqlitedb.select_data(table_name = "accuracy")
|
||||
if not existing_data.empty:
|
||||
max_id = existing_data['id'].astype(int).max()
|
||||
df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
|
||||
else:
|
||||
df_predict2['id'] = range(1, 1 + len(df_predict2))
|
||||
df_predict2['CREAT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d')
|
||||
df_predict2['PREDICT_DATE'] = df_predict2['ds']
|
||||
df_predict2['MIN_PRICE'] = df_predict2['min_within_quantile']
|
||||
df_predict2['MAX_PRICE'] = df_predict2['max_within_quantile']
|
||||
print(df_predict2.clumns)
|
||||
df_predict2 = df_predict2[['id','PREDICT_DATE','CREAT_DATE','MIN_PRICE','MAX_PRICE']]
|
||||
df_predict2.to_sql(table_name, con=sqlitedb.connection, if_exists='append', index=False)
|
||||
|
||||
|
||||
|
||||
|
||||
def _add_abs_error_rate():
|
||||
# 计算每个预测值与真实值之间的偏差率
|
||||
|
Loading…
Reference in New Issue
Block a user