预测表添加y值
This commit is contained in:
parent
1255dec24e
commit
bfb981d486
@ -173,7 +173,7 @@ login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
|
|||||||
upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
|
||||||
# upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
|
# upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
|
||||||
upload_warning_url = "http://192.168.100.53:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save"
|
upload_warning_url = "http://192.168.100.53:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save"
|
||||||
|
query_data_list_item_nos_url = "http://192.168.100.53:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos"
|
||||||
|
|
||||||
login_data = {
|
login_data = {
|
||||||
"data": {
|
"data": {
|
||||||
@ -213,6 +213,18 @@ warning_data = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
query_data_list_item_nos_data = {
|
||||||
|
"funcModule":'数据项管理',
|
||||||
|
"funcOperation":'查询数据项编码',
|
||||||
|
"data":{
|
||||||
|
"dataItemNoList":['Brent活跃合约',''],
|
||||||
|
"dateEnd":'',
|
||||||
|
"dateStart":'2023-01-01'
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# 北京环境数据库
|
# 北京环境数据库
|
||||||
host = '192.168.101.27'
|
host = '192.168.101.27'
|
||||||
port = 3306
|
port = 3306
|
||||||
|
@ -107,6 +107,20 @@ def predict_main():
|
|||||||
continue
|
continue
|
||||||
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
|
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
|
||||||
|
|
||||||
|
# 更新accuracy表的y值
|
||||||
|
if not sqlitedb.check_table_exists('accuracy'):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
update_y = sqlitedb.select_data('accuracy',where_condition="y is null")
|
||||||
|
if len(update_y) > 0:
|
||||||
|
logger.info('更新accuracy表的y值')
|
||||||
|
# 找到update_y 中ds且df中的y的行
|
||||||
|
update_y = update_y[update_y['ds']<=end_time]
|
||||||
|
for row in update_y.itertuples(index=False):
|
||||||
|
row_dict = row._asdict()
|
||||||
|
yy = df[df['ds']==row_dict['ds']]['y'].values[0]
|
||||||
|
sqlitedb.update_data('accuracy', f"y = {yy}", where_condition=f"ds = '{row_dict['ds']}'")
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
# 判断当前日期是不是周一
|
# 判断当前日期是不是周一
|
||||||
is_weekday = datetime.datetime.now().weekday() == 0
|
is_weekday = datetime.datetime.now().weekday() == 0
|
||||||
@ -243,7 +257,7 @@ if __name__ == '__main__':
|
|||||||
global end_time
|
global end_time
|
||||||
is_on = True
|
is_on = True
|
||||||
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
||||||
for i_time in pd.date_range('2024-10-07', '2024-12-16', freq='B'):
|
for i_time in pd.date_range('2024-10-29', '2024-12-16', freq='B'):
|
||||||
end_time = i_time.strftime('%Y-%m-%d')
|
end_time = i_time.strftime('%Y-%m-%d')
|
||||||
predict_main()
|
predict_main()
|
||||||
if is_on:
|
if is_on:
|
||||||
|
@ -401,8 +401,6 @@ def model_losss(sqlitedb,end_time):
|
|||||||
else:
|
else:
|
||||||
return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])
|
return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_most_common_model():
|
def find_most_common_model():
|
||||||
# 最多频率的模型名称
|
# 最多频率的模型名称
|
||||||
min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()
|
min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()
|
||||||
@ -445,17 +443,7 @@ def model_losss(sqlitedb,end_time):
|
|||||||
df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
||||||
except:
|
except:
|
||||||
df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
||||||
|
|
||||||
# 更新accuracy表中的y值
|
|
||||||
update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null')
|
|
||||||
if len(update_y) > 0:
|
|
||||||
df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]
|
|
||||||
if len(df_combined4) > 0:
|
|
||||||
for index, row in df_combined4.iterrows():
|
|
||||||
try:
|
|
||||||
sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'")
|
|
||||||
except:
|
|
||||||
logger.error(f'更新accuracy表中的y值失败,row={row}')
|
|
||||||
# 上周准确率计算
|
# 上周准确率计算
|
||||||
predict_y = sqlitedb.select_data(table_name = "accuracy")
|
predict_y = sqlitedb.select_data(table_name = "accuracy")
|
||||||
# ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
|
# ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
|
||||||
@ -479,6 +467,8 @@ def model_losss(sqlitedb,end_time):
|
|||||||
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
|
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
|
||||||
except:
|
except:
|
||||||
logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')
|
logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')
|
||||||
|
|
||||||
|
|
||||||
# 拼接市场最高最低价
|
# 拼接市场最高最低价
|
||||||
xlsfilename = os.path.join(dataset,'数据项下载.xls')
|
xlsfilename = os.path.join(dataset,'数据项下载.xls')
|
||||||
df2 = pd.read_excel(xlsfilename)[5:]
|
df2 = pd.read_excel(xlsfilename)[5:]
|
||||||
@ -496,6 +486,7 @@ def model_losss(sqlitedb,end_time):
|
|||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
# 定义一个函数来计算准确率
|
||||||
# 比较真实最高最低,和预测最高最低 计算准确率
|
# 比较真实最高最低,和预测最高最低 计算准确率
|
||||||
def calculate_accuracy(row):
|
def calculate_accuracy(row):
|
||||||
# 全子集情况:
|
# 全子集情况:
|
||||||
@ -527,430 +518,26 @@ def model_losss(sqlitedb,end_time):
|
|||||||
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
|
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
|
||||||
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
|
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
|
||||||
return create_dates,ds_dates
|
return create_dates,ds_dates
|
||||||
|
|
||||||
create_dates,ds_dates = get_week_date(end_time)
|
|
||||||
# 计算准确率并保存结果
|
# 计算准确率并保存结果
|
||||||
def _get_accuracy_rate(df,create_dates,ds_dates):
|
def _get_accuracy_rate(df,create_dates,ds_dates,endtime):
|
||||||
df3 = df.copy()
|
df3 = df.copy()
|
||||||
df3 = df3[df3['CREAT_DATE'].isin(create_dates)]
|
df3 = df3[df3['CREAT_DATE'].isin(create_dates)]
|
||||||
df3 = df3[df3['ds'].isin(ds_dates)]
|
df3 = df3[df3['ds'].isin(ds_dates)]
|
||||||
accuracy_rote = 0
|
accuracy_rote = 0
|
||||||
for i,group in df3.groupby('CREAT_DATE'):
|
for i,group in df3.groupby('CREAT_DATE'):
|
||||||
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
|
# print('日期:',i)
|
||||||
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
|
# print(group)
|
||||||
df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
|
# print('权重:',weight_dict[len(group)-1])
|
||||||
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
|
# print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])
|
||||||
# return df4
|
|
||||||
|
|
||||||
_get_accuracy_rate(df,create_dates,ds_dates)
|
|
||||||
|
|
||||||
def _add_abs_error_rate():
|
|
||||||
# 计算每个预测值与真实值之间的偏差率
|
|
||||||
for model in allmodelnames:
|
|
||||||
df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']
|
|
||||||
|
|
||||||
# 获取每行对应的最小偏差率值
|
|
||||||
min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)
|
|
||||||
# 获取每行对应的最小偏差率值对应的列名
|
|
||||||
min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1)
|
|
||||||
# 将列名索引转换为列名
|
|
||||||
min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
|
|
||||||
# 获取最小偏差率对应的模型的预测值
|
|
||||||
min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)
|
|
||||||
# 将最小偏差率对应的模型的预测值添加到DataFrame中
|
|
||||||
df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions
|
|
||||||
df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name
|
|
||||||
# _add_abs_error_rate()
|
|
||||||
|
|
||||||
# 判断 df 的数值列转为float
|
|
||||||
for col in df_combined3.columns:
|
|
||||||
try:
|
|
||||||
if col != 'ds':
|
|
||||||
df_combined3[col] = df_combined3[col].astype(float)
|
|
||||||
df_combined3[col] = df_combined3[col].round(2)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False)
|
|
||||||
|
|
||||||
|
|
||||||
# 历史价格+预测价格
|
|
||||||
sqlitedb.drop_table('testandpredict_groupby')
|
|
||||||
df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)
|
|
||||||
# 新增均值列
|
|
||||||
df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)
|
|
||||||
|
|
||||||
def _plt_predict_ture(df):
|
|
||||||
lens = df.shape[0] if df.shape[0] < 180 else 90
|
|
||||||
df = df[-lens:] # 取180个数据点画图
|
|
||||||
# 历史价格
|
|
||||||
plt.figure(figsize=(20, 10))
|
|
||||||
plt.plot(df['ds'], df['y'], label='真实值')
|
|
||||||
# 均值线
|
|
||||||
plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')
|
|
||||||
# 颜色填充
|
|
||||||
plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)
|
|
||||||
markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
|
|
||||||
random_marker = random.choice(markers)
|
|
||||||
for model in modelnames:
|
|
||||||
# for model in ['BiTCN','RNN']:
|
|
||||||
plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)
|
|
||||||
# plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')
|
|
||||||
# 网格
|
|
||||||
plt.grid(True)
|
|
||||||
# 显示历史值
|
|
||||||
for i, j in zip(df['ds'], df['y']):
|
|
||||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
|
||||||
|
|
||||||
# for model in most_model:
|
|
||||||
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
|
||||||
# 当前日期画竖虚线
|
|
||||||
plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
|
|
||||||
plt.legend()
|
|
||||||
plt.xlabel('日期')
|
|
||||||
plt.ylabel('价格')
|
|
||||||
|
|
||||||
plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def _plt_predict_table(df):
|
|
||||||
# 预测值表格
|
|
||||||
fig, ax = plt.subplots(figsize=(20, 6))
|
|
||||||
ax.axis('off') # 关闭坐标轴
|
|
||||||
# 数值保留2位小数
|
|
||||||
df = df.round(2)
|
|
||||||
df = df[-horizon:]
|
|
||||||
df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]
|
|
||||||
# Day列放到最前面
|
|
||||||
df = df[['Day'] + list(df.columns[:-1])]
|
|
||||||
table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')
|
|
||||||
#加宽表格
|
|
||||||
table.auto_set_font_size(False)
|
|
||||||
table.set_fontsize(10)
|
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
|
||||||
plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def _plt_model_results3():
|
|
||||||
# 可视化评估结果
|
|
||||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
||||||
fig, ax = plt.subplots(figsize=(20, 10))
|
|
||||||
ax.axis('off') # 关闭坐标轴
|
|
||||||
table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')
|
|
||||||
# 加宽表格
|
|
||||||
table.auto_set_font_size(False)
|
|
||||||
table.set_fontsize(10)
|
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
|
||||||
plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
_plt_predict_ture(df_combined3)
|
|
||||||
_plt_predict_table(df_combined3)
|
|
||||||
_plt_model_results3()
|
|
||||||
|
|
||||||
return model_results3
|
|
||||||
|
|
||||||
# 原油计算预测评估指数
|
|
||||||
@exception_logger
|
|
||||||
def model_losss_bak(sqlitedb,end_time):
|
|
||||||
global dataset
|
|
||||||
global rote
|
|
||||||
most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]
|
|
||||||
most_model_name = most_model[0]
|
|
||||||
|
|
||||||
# 预测数据处理 predict
|
|
||||||
df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv"))
|
|
||||||
df_combined = dateConvert(df_combined)
|
|
||||||
# 删除空列
|
|
||||||
df_combined.dropna(axis=1,inplace=True)
|
|
||||||
# 删除缺失值,预测过程不能有缺失值
|
|
||||||
df_combined.dropna(inplace=True)
|
|
||||||
# 其他列转为数值类型
|
|
||||||
df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })
|
|
||||||
# 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值
|
|
||||||
df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')
|
|
||||||
|
|
||||||
# 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
|
|
||||||
df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]
|
|
||||||
# 删除模型生成的cutoff列
|
|
||||||
df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)
|
|
||||||
# 获取模型名称
|
|
||||||
modelnames = df_combined.columns.to_list()[1:]
|
|
||||||
if 'y' in modelnames:
|
|
||||||
modelnames.remove('y')
|
|
||||||
df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要
|
|
||||||
|
|
||||||
|
|
||||||
# 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE
|
|
||||||
cellText = []
|
|
||||||
|
|
||||||
# 遍历模型名称,计算模型评估指标
|
|
||||||
for model in modelnames:
|
|
||||||
modelmse = mse(df_combined['y'], df_combined[model])
|
|
||||||
modelrmse = rmse(df_combined['y'], df_combined[model])
|
|
||||||
modelmae = mae(df_combined['y'], df_combined[model])
|
|
||||||
# modelmape = mape(df_combined['y'], df_combined[model])
|
|
||||||
# modelsmape = smape(df_combined['y'], df_combined[model])
|
|
||||||
# modelr2 = r2_score(df_combined['y'], df_combined[model])
|
|
||||||
cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])
|
|
||||||
|
|
||||||
model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])
|
|
||||||
# 按MSE降序排列
|
|
||||||
model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)
|
|
||||||
model_results3.to_csv(os.path.join(dataset,"model_evaluation.csv"),index=False)
|
|
||||||
modelnames = model_results3['模型(Model)'].tolist()
|
|
||||||
allmodelnames = modelnames.copy()
|
|
||||||
# 保存5个最佳模型的名称
|
|
||||||
if len(modelnames) > 5:
|
|
||||||
modelnames = modelnames[0:5]
|
|
||||||
if is_fivemodels:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f:
|
|
||||||
f.write(','.join(modelnames) + '\n')
|
|
||||||
|
|
||||||
# 预测值与真实值对比图
|
|
||||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
||||||
plt.figure(figsize=(15, 10))
|
|
||||||
for n,model in enumerate(modelnames[:5]):
|
|
||||||
plt.subplot(3, 2, n+1)
|
|
||||||
plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')
|
|
||||||
plt.plot(df_combined3['ds'], df_combined3[model], label=model)
|
|
||||||
plt.legend()
|
|
||||||
plt.xlabel('日期')
|
|
||||||
plt.ylabel('价格')
|
|
||||||
plt.title(model+'拟合')
|
|
||||||
plt.subplots_adjust(hspace=0.5)
|
|
||||||
plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
|
||||||
# # 历史数据+预测数据
|
|
||||||
# # 拼接未来时间预测
|
|
||||||
df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))
|
|
||||||
df_predict.drop('unique_id',inplace=True,axis=1)
|
|
||||||
df_predict.dropna(axis=1,inplace=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')
|
|
||||||
except ValueError :
|
|
||||||
df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')
|
|
||||||
|
|
||||||
# def first_row_to_database(df):
|
|
||||||
# # # 取第一行数据存储到数据库中
|
|
||||||
# first_row = df.head(1)
|
|
||||||
# first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')
|
|
||||||
# # 将预测结果保存到数据库
|
|
||||||
# if not sqlitedb.check_table_exists('trueandpredict'):
|
|
||||||
# first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
|
|
||||||
# else:
|
|
||||||
# for col in first_row.columns:
|
|
||||||
# sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
|
|
||||||
# for row in first_row.itertuples(index=False):
|
|
||||||
# row_dict = row._asdict()
|
|
||||||
# columns=row_dict.keys()
|
|
||||||
# check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
|
|
||||||
# if len(check_query) > 0:
|
|
||||||
# set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
|
||||||
# sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
|
|
||||||
# continue
|
|
||||||
# sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)
|
|
||||||
|
|
||||||
# first_row_to_database(df_predict)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)
|
|
||||||
|
|
||||||
# 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值
|
|
||||||
names = []
|
|
||||||
names_df = df_combined3.copy()
|
|
||||||
for col in allmodelnames:
|
|
||||||
names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name]
|
|
||||||
names.append(f'{col}-{most_model_name}-误差比例')
|
|
||||||
|
|
||||||
names_df = names_df[names]
|
|
||||||
def add_rote_column(row):
|
|
||||||
columns = []
|
|
||||||
for r in names_df.columns:
|
|
||||||
if row[r] <= rote:
|
|
||||||
columns.append(r.split('-')[0])
|
|
||||||
return pd.Series([columns], index=['columns'])
|
|
||||||
names_df['columns'] = names_df.apply(add_rote_column, axis=1)
|
|
||||||
|
|
||||||
def add_upper_lower_bound(row):
|
|
||||||
|
|
||||||
# 计算上边界值
|
|
||||||
upper_bound = row.max()
|
|
||||||
# 计算下边界值
|
|
||||||
lower_bound = row.min()
|
|
||||||
return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
|
|
||||||
|
|
||||||
# df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
|
|
||||||
|
|
||||||
# 取前五最佳模型的最大最小值作为上下边界值
|
|
||||||
df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)
|
|
||||||
|
|
||||||
def find_closest_values(row):
|
|
||||||
x = row.y
|
|
||||||
if x is None or np.isnan(x):
|
|
||||||
return pd.Series([None, None], index=['min_price','max_price'])
|
|
||||||
# row = row.drop('ds')
|
|
||||||
row = row.values.tolist()
|
|
||||||
row.sort()
|
|
||||||
print(row)
|
|
||||||
# x 在row中的索引
|
|
||||||
index = row.index(x)
|
|
||||||
if index == 0:
|
|
||||||
return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price'])
|
|
||||||
elif index == len(row)-1:
|
|
||||||
return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price'])
|
|
||||||
else:
|
|
||||||
return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_most_common_model():
|
|
||||||
# 最多频率的模型名称
|
|
||||||
min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()
|
|
||||||
max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax()
|
|
||||||
if min_model_max_frequency_model == max_model_max_frequency_model:
|
|
||||||
# 取60天第二多的模型
|
|
||||||
max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1]
|
|
||||||
|
|
||||||
df_predict['min_model'] = min_model_max_frequency_model
|
|
||||||
df_predict['max_model'] = max_model_max_frequency_model
|
|
||||||
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]
|
|
||||||
df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]
|
|
||||||
|
|
||||||
|
|
||||||
# find_most_common_model()
|
|
||||||
|
|
||||||
df_combined3['ds'] = pd.to_datetime(df_combined3['ds'])
|
|
||||||
df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d')
|
|
||||||
df_predict2 = df_combined3.tail(horizon)
|
|
||||||
|
|
||||||
# 保存到数据库
|
|
||||||
if not sqlitedb.check_table_exists('accuracy'):
|
|
||||||
columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])
|
|
||||||
sqlitedb.create_table('accuracy',columns=columns)
|
|
||||||
existing_data = sqlitedb.select_data(table_name = "accuracy")
|
|
||||||
|
|
||||||
if not existing_data.empty:
|
|
||||||
max_id = existing_data['id'].astype(int).max()
|
|
||||||
df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
|
|
||||||
else:
|
|
||||||
df_predict2['id'] = range(1, 1 + len(df_predict2))
|
|
||||||
# df_predict2['CREAT_DATE'] = now if end_time == '' else end_time
|
|
||||||
df_predict2['CREAT_DATE'] = end_time
|
|
||||||
def get_common_columns(df1, df2):
|
|
||||||
# 获取两个DataFrame的公共列名
|
|
||||||
return list(set(df1.columns).intersection(df2.columns))
|
|
||||||
|
|
||||||
common_columns = get_common_columns(df_predict2, existing_data)
|
|
||||||
try:
|
|
||||||
df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
|
||||||
except:
|
|
||||||
df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
|
|
||||||
|
|
||||||
# 更新accuracy表中的y值
|
|
||||||
update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null')
|
|
||||||
if len(update_y) > 0:
|
|
||||||
df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]
|
|
||||||
if len(df_combined4) > 0:
|
|
||||||
for index, row in df_combined4.iterrows():
|
|
||||||
try:
|
|
||||||
sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'")
|
|
||||||
except:
|
|
||||||
logger.error(f'更新accuracy表中的y值失败,row={row}')
|
|
||||||
# 上周准确率计算
|
|
||||||
predict_y = sqlitedb.select_data(table_name = "accuracy")
|
|
||||||
# ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
|
|
||||||
ids = predict_y['id'].tolist()
|
|
||||||
# 准确率基准与绘图上下界逻辑一致
|
|
||||||
# predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']]
|
|
||||||
# 模型评估前五均值
|
|
||||||
predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1
|
|
||||||
predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1
|
|
||||||
# 模型评估前十均值
|
|
||||||
# predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1
|
|
||||||
# predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1
|
|
||||||
# 模型评估前十最大最小
|
|
||||||
# allmodelnames 和 predict_y 列 重复的
|
|
||||||
# allmodelnames = [col for col in allmodelnames if col in predict_y.columns]
|
|
||||||
# predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1)
|
|
||||||
# predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1)
|
|
||||||
for id in ids:
|
|
||||||
row = predict_y[predict_y['id'] == id]
|
|
||||||
try:
|
|
||||||
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
|
|
||||||
except:
|
|
||||||
logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')
|
|
||||||
# 拼接市场最高最低价
|
|
||||||
xlsfilename = os.path.join(dataset,'数据项下载.xls')
|
|
||||||
df2 = pd.read_excel(xlsfilename)[5:]
|
|
||||||
df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})
|
|
||||||
print(df2.shape)
|
|
||||||
df = pd.merge(predict_y,df2,on=['ds'],how='left')
|
|
||||||
df['ds'] = pd.to_datetime(df['ds'])
|
|
||||||
df = df.reindex()
|
|
||||||
|
|
||||||
# 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0
|
|
||||||
def is_within_range(row):
|
|
||||||
for model in allmodelnames:
|
|
||||||
if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']:
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# 比较真实最高最低,和预测最高最低 计算准确率
|
|
||||||
def calculate_accuracy(row):
|
|
||||||
# 全子集情况:
|
|
||||||
if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \
|
|
||||||
(row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):
|
|
||||||
return 1
|
|
||||||
# 无交集情况:
|
|
||||||
if row['max_price'] < row['LOW_PRICE'] or \
|
|
||||||
row['min_price'] > row['HIGH_PRICE']:
|
|
||||||
return 0
|
|
||||||
# 有交集情况:
|
|
||||||
else:
|
|
||||||
sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])
|
|
||||||
middle_diff = sorted_prices[2] - sorted_prices[1]
|
|
||||||
price_range = row['HIGH_PRICE'] - row['LOW_PRICE']
|
|
||||||
accuracy = middle_diff / price_range
|
|
||||||
return accuracy
|
|
||||||
|
|
||||||
columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']
|
|
||||||
df[columns] = df[columns].astype(float)
|
|
||||||
df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)
|
|
||||||
# df['ACCURACY'] = df.apply(is_within_range, axis=1)
|
|
||||||
# 取结束日期上一周的日期
|
|
||||||
def get_week_date(end_time):
|
|
||||||
endtime = end_time
|
|
||||||
endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')
|
|
||||||
up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)
|
|
||||||
up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]
|
|
||||||
up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]
|
|
||||||
return up_week_dates
|
|
||||||
up_week_dates = get_week_date(end_time)
|
|
||||||
|
|
||||||
# 计算准确率并保存结果
|
|
||||||
def _get_accuracy_rate(df,up_week_dates,endtime):
|
|
||||||
df3 = df.copy()
|
|
||||||
df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]
|
|
||||||
df3 = df3[df3['ds'].isin(up_week_dates)]
|
|
||||||
accuracy_rote = 0
|
|
||||||
for i,group in df3.groupby('ds'):
|
|
||||||
print('权重:',weight_dict[len(group)-1])
|
|
||||||
print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])
|
|
||||||
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
|
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
|
||||||
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
|
df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False)
|
||||||
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
|
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
|
||||||
df4.loc[len(df4)] = {'开始日期':up_week_dates[0],'结束日期':up_week_dates[-1],'准确率':accuracy_rote}
|
df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
|
||||||
|
df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)
|
||||||
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
|
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
|
||||||
_get_accuracy_rate(df,up_week_dates,end_time)
|
create_dates,ds_dates = get_week_date(end_time)
|
||||||
|
_get_accuracy_rate(df,create_dates,ds_dates,end_time)
|
||||||
|
|
||||||
def _add_abs_error_rate():
|
def _add_abs_error_rate():
|
||||||
# 计算每个预测值与真实值之间的偏差率
|
# 计算每个预测值与真实值之间的偏差率
|
||||||
@ -1215,7 +802,7 @@ def model_losss_juxiting(sqlitedb):
|
|||||||
|
|
||||||
df_predict2 = df_predict.copy()
|
df_predict2 = df_predict.copy()
|
||||||
df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])
|
df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])
|
||||||
df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')
|
df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
|
||||||
def _add_abs_error_rate():
|
def _add_abs_error_rate():
|
||||||
|
710
原油预测绘图调试.ipynb
Normal file
710
原油预测绘图调试.ipynb
Normal file
@ -0,0 +1,710 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "7fadc60c-d710-4b8c-89cd-1d889ece1eaf",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"数据库连接成功 192.168.101.27 jingbo_test root\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 读取配置\n",
|
||||||
|
"# 父目录下的lib\n",
|
||||||
|
"from lib.dataread import *\n",
|
||||||
|
"from lib.tools import Graphs,mse,rmse,mae,exception_logger\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "0e5b6f30-b7ca-4718-97a3-48b54156e07f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"(51, 30)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>模型(Model)</th>\n",
|
||||||
|
" <th>平均平方误差(MSE)</th>\n",
|
||||||
|
" <th>均方根误差(RMSE)</th>\n",
|
||||||
|
" <th>平均绝对误差(MAE)</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>DilatedRNN</td>\n",
|
||||||
|
" <td>1.567000</td>\n",
|
||||||
|
" <td>1.252</td>\n",
|
||||||
|
" <td>0.978</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>14</th>\n",
|
||||||
|
" <td>NLinear</td>\n",
|
||||||
|
" <td>1.905000</td>\n",
|
||||||
|
" <td>1.380</td>\n",
|
||||||
|
" <td>1.104</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>BiTCN</td>\n",
|
||||||
|
" <td>1.906000</td>\n",
|
||||||
|
" <td>1.380</td>\n",
|
||||||
|
" <td>1.042</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>PatchTST</td>\n",
|
||||||
|
" <td>1.939000</td>\n",
|
||||||
|
" <td>1.393</td>\n",
|
||||||
|
" <td>1.129</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>19</th>\n",
|
||||||
|
" <td>TiDE</td>\n",
|
||||||
|
" <td>1.967000</td>\n",
|
||||||
|
" <td>1.402</td>\n",
|
||||||
|
" <td>1.090</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>TSMixer</td>\n",
|
||||||
|
" <td>2.056000</td>\n",
|
||||||
|
" <td>1.434</td>\n",
|
||||||
|
" <td>1.111</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>RNN</td>\n",
|
||||||
|
" <td>2.101000</td>\n",
|
||||||
|
" <td>1.449</td>\n",
|
||||||
|
" <td>1.144</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>13</th>\n",
|
||||||
|
" <td>DLinear</td>\n",
|
||||||
|
" <td>2.162000</td>\n",
|
||||||
|
" <td>1.470</td>\n",
|
||||||
|
" <td>1.178</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>15</th>\n",
|
||||||
|
" <td>TFT</td>\n",
|
||||||
|
" <td>2.196000</td>\n",
|
||||||
|
" <td>1.482</td>\n",
|
||||||
|
" <td>1.137</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>16</th>\n",
|
||||||
|
" <td>FEDformer</td>\n",
|
||||||
|
" <td>2.211000</td>\n",
|
||||||
|
" <td>1.487</td>\n",
|
||||||
|
" <td>1.239</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>TCN</td>\n",
|
||||||
|
" <td>2.397000</td>\n",
|
||||||
|
" <td>1.548</td>\n",
|
||||||
|
" <td>1.276</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>NHITS</td>\n",
|
||||||
|
" <td>2.454000</td>\n",
|
||||||
|
" <td>1.567</td>\n",
|
||||||
|
" <td>1.190</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>12</th>\n",
|
||||||
|
" <td>MLP</td>\n",
|
||||||
|
" <td>2.468000</td>\n",
|
||||||
|
" <td>1.571</td>\n",
|
||||||
|
" <td>1.224</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>TSMixerx</td>\n",
|
||||||
|
" <td>2.490000</td>\n",
|
||||||
|
" <td>1.578</td>\n",
|
||||||
|
" <td>1.231</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>Informer</td>\n",
|
||||||
|
" <td>3.095000</td>\n",
|
||||||
|
" <td>1.759</td>\n",
|
||||||
|
" <td>1.352</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>20</th>\n",
|
||||||
|
" <td>DeepNPTS</td>\n",
|
||||||
|
" <td>3.267000</td>\n",
|
||||||
|
" <td>1.808</td>\n",
|
||||||
|
" <td>1.357</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>GRU</td>\n",
|
||||||
|
" <td>5.172000</td>\n",
|
||||||
|
" <td>2.274</td>\n",
|
||||||
|
" <td>1.909</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>LSTM</td>\n",
|
||||||
|
" <td>6.844000</td>\n",
|
||||||
|
" <td>2.616</td>\n",
|
||||||
|
" <td>2.386</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>18</th>\n",
|
||||||
|
" <td>MLPMultivariate</td>\n",
|
||||||
|
" <td>8.163000</td>\n",
|
||||||
|
" <td>2.857</td>\n",
|
||||||
|
" <td>2.221</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>17</th>\n",
|
||||||
|
" <td>StemGNN</td>\n",
|
||||||
|
" <td>17.216000</td>\n",
|
||||||
|
" <td>4.149</td>\n",
|
||||||
|
" <td>3.359</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>iTransformer</td>\n",
|
||||||
|
" <td>21.568001</td>\n",
|
||||||
|
" <td>4.644</td>\n",
|
||||||
|
" <td>3.487</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" 模型(Model) 平均平方误差(MSE) 均方根误差(RMSE) 平均绝对误差(MAE)\n",
|
||||||
|
"11 DilatedRNN 1.567000 1.252 0.978\n",
|
||||||
|
"14 NLinear 1.905000 1.380 1.104\n",
|
||||||
|
"10 BiTCN 1.906000 1.380 1.042\n",
|
||||||
|
"6 PatchTST 1.939000 1.393 1.129\n",
|
||||||
|
"19 TiDE 1.967000 1.402 1.090\n",
|
||||||
|
"4 TSMixer 2.056000 1.434 1.111\n",
|
||||||
|
"7 RNN 2.101000 1.449 1.144\n",
|
||||||
|
"13 DLinear 2.162000 1.470 1.178\n",
|
||||||
|
"15 TFT 2.196000 1.482 1.137\n",
|
||||||
|
"16 FEDformer 2.211000 1.487 1.239\n",
|
||||||
|
"9 TCN 2.397000 1.548 1.276\n",
|
||||||
|
"0 NHITS 2.454000 1.567 1.190\n",
|
||||||
|
"12 MLP 2.468000 1.571 1.224\n",
|
||||||
|
"5 TSMixerx 2.490000 1.578 1.231\n",
|
||||||
|
"1 Informer 3.095000 1.759 1.352\n",
|
||||||
|
"20 DeepNPTS 3.267000 1.808 1.357\n",
|
||||||
|
"8 GRU 5.172000 2.274 1.909\n",
|
||||||
|
"2 LSTM 6.844000 2.616 2.386\n",
|
||||||
|
"18 MLPMultivariate 8.163000 2.857 2.221\n",
|
||||||
|
"17 StemGNN 17.216000 4.149 3.359\n",
|
||||||
|
"3 iTransformer 21.568001 4.644 3.487"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"# 原油计算预测评估指数\n",
|
||||||
|
"@exception_logger\n",
|
||||||
|
"def model_losss(sqlitedb,end_time):\n",
|
||||||
|
" global dataset\n",
|
||||||
|
" global rote\n",
|
||||||
|
" most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]\n",
|
||||||
|
" most_model_name = most_model[0]\n",
|
||||||
|
"\n",
|
||||||
|
" # 预测数据处理 predict\n",
|
||||||
|
" df_combined = loadcsv(os.path.join(dataset,\"cross_validation.csv\")) \n",
|
||||||
|
" df_combined = dateConvert(df_combined)\n",
|
||||||
|
" # 删除空列\n",
|
||||||
|
" df_combined.dropna(axis=1,inplace=True)\n",
|
||||||
|
" # 删除缺失值,预测过程不能有缺失值\n",
|
||||||
|
" df_combined.dropna(inplace=True) \n",
|
||||||
|
" # 其他列转为数值类型\n",
|
||||||
|
" df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })\n",
|
||||||
|
" # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值\n",
|
||||||
|
" df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')\n",
|
||||||
|
"\n",
|
||||||
|
" # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列\n",
|
||||||
|
" df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]\n",
|
||||||
|
" # 删除模型生成的cutoff列\n",
|
||||||
|
" df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)\n",
|
||||||
|
" # 获取模型名称\n",
|
||||||
|
" modelnames = df_combined.columns.to_list()[1:] \n",
|
||||||
|
" if 'y' in modelnames:\n",
|
||||||
|
" modelnames.remove('y')\n",
|
||||||
|
" # df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要\n",
|
||||||
|
" df_combined3 = sqlitedb.select_data('accuracy')\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE\n",
|
||||||
|
" cellText = []\n",
|
||||||
|
"\n",
|
||||||
|
" # 遍历模型名称,计算模型评估指标 \n",
|
||||||
|
" for model in modelnames:\n",
|
||||||
|
" modelmse = mse(df_combined['y'], df_combined[model])\n",
|
||||||
|
" modelrmse = rmse(df_combined['y'], df_combined[model])\n",
|
||||||
|
" modelmae = mae(df_combined['y'], df_combined[model])\n",
|
||||||
|
" # modelmape = mape(df_combined['y'], df_combined[model])\n",
|
||||||
|
" # modelsmape = smape(df_combined['y'], df_combined[model])\n",
|
||||||
|
" # modelr2 = r2_score(df_combined['y'], df_combined[model])\n",
|
||||||
|
" cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])\n",
|
||||||
|
" \n",
|
||||||
|
" model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])\n",
|
||||||
|
" # 按MSE降序排列\n",
|
||||||
|
" model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)\n",
|
||||||
|
" model_results3.to_csv(os.path.join(dataset,\"model_evaluation.csv\"),index=False)\n",
|
||||||
|
" modelnames = model_results3['模型(Model)'].tolist()\n",
|
||||||
|
" allmodelnames = modelnames.copy()\n",
|
||||||
|
" # # 保存5个最佳模型的名称\n",
|
||||||
|
" # if len(modelnames) > 5:\n",
|
||||||
|
" # modelnames = modelnames[0:5]\n",
|
||||||
|
" # if is_fivemodels:\n",
|
||||||
|
" # pass\n",
|
||||||
|
" # else:\n",
|
||||||
|
" # with open(os.path.join(dataset,\"best_modelnames.txt\"), 'w') as f:\n",
|
||||||
|
" # f.write(','.join(modelnames) + '\\n')\n",
|
||||||
|
"\n",
|
||||||
|
" # # 预测值与真实值对比图\n",
|
||||||
|
" # plt.rcParams['font.sans-serif'] = ['SimHei']\n",
|
||||||
|
" # plt.figure(figsize=(15, 10))\n",
|
||||||
|
" # for n,model in enumerate(modelnames[:5]):\n",
|
||||||
|
" # plt.subplot(3, 2, n+1)\n",
|
||||||
|
" # plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')\n",
|
||||||
|
" # plt.plot(df_combined3['ds'], df_combined3[model], label=model)\n",
|
||||||
|
" # plt.legend()\n",
|
||||||
|
" # plt.xlabel('日期')\n",
|
||||||
|
" # plt.ylabel('价格')\n",
|
||||||
|
" # plt.title(model+'拟合')\n",
|
||||||
|
" # plt.subplots_adjust(hspace=0.5)\n",
|
||||||
|
" # plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')\n",
|
||||||
|
" # plt.close()\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" # # 历史数据+预测数据\n",
|
||||||
|
" # # 拼接未来时间预测\n",
|
||||||
|
" df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))\n",
|
||||||
|
" df_predict.drop('unique_id',inplace=True,axis=1)\n",
|
||||||
|
" df_predict.dropna(axis=1,inplace=True)\n",
|
||||||
|
"\n",
|
||||||
|
" try:\n",
|
||||||
|
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n",
|
||||||
|
" except ValueError :\n",
|
||||||
|
" df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n",
|
||||||
|
"\n",
|
||||||
|
" \n",
|
||||||
|
" df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n",
|
||||||
|
"\n",
|
||||||
|
" # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值\n",
|
||||||
|
" # names = []\n",
|
||||||
|
" # names_df = df_combined3.copy()\n",
|
||||||
|
" # for col in allmodelnames:\n",
|
||||||
|
" # names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name]\n",
|
||||||
|
" # names.append(f'{col}-{most_model_name}-误差比例')\n",
|
||||||
|
"\n",
|
||||||
|
" # names_df = names_df[names]\n",
|
||||||
|
" # def add_rote_column(row):\n",
|
||||||
|
" # columns = []\n",
|
||||||
|
" # for r in names_df.columns:\n",
|
||||||
|
" # if row[r] <= rote:\n",
|
||||||
|
" # columns.append(r.split('-')[0])\n",
|
||||||
|
" # return pd.Series([columns], index=['columns'])\n",
|
||||||
|
" # names_df['columns'] = names_df.apply(add_rote_column, axis=1)\n",
|
||||||
|
" \n",
|
||||||
|
" def add_upper_lower_bound(row):\n",
|
||||||
|
"\n",
|
||||||
|
" # 计算上边界值\n",
|
||||||
|
" upper_bound = row.max()\n",
|
||||||
|
" # 计算下边界值\n",
|
||||||
|
" lower_bound = row.min()\n",
|
||||||
|
" return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])\n",
|
||||||
|
"\n",
|
||||||
|
" # df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # 取前五最佳模型的最大最小值作为上下边界值\n",
|
||||||
|
" # df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)\n",
|
||||||
|
" \n",
|
||||||
|
" def find_closest_values(row):\n",
|
||||||
|
" x = row.y\n",
|
||||||
|
" if x is None or np.isnan(x):\n",
|
||||||
|
" return pd.Series([None, None], index=['min_price','max_price'])\n",
|
||||||
|
" # row = row.drop('ds')\n",
|
||||||
|
" row = row.values.tolist()\n",
|
||||||
|
" row.sort()\n",
|
||||||
|
" print(row)\n",
|
||||||
|
" # x 在row中的索引\n",
|
||||||
|
" index = row.index(x)\n",
|
||||||
|
" if index == 0:\n",
|
||||||
|
" return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price'])\n",
|
||||||
|
" elif index == len(row)-1:\n",
|
||||||
|
" return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price'])\n",
|
||||||
|
" else:\n",
|
||||||
|
" return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" \n",
|
||||||
|
" def find_most_common_model():\n",
|
||||||
|
" # 最多频率的模型名称\n",
|
||||||
|
" min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()\n",
|
||||||
|
" max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax()\n",
|
||||||
|
" if min_model_max_frequency_model == max_model_max_frequency_model:\n",
|
||||||
|
" # 取60天第二多的模型\n",
|
||||||
|
" max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1]\n",
|
||||||
|
"\n",
|
||||||
|
" df_predict['min_model'] = min_model_max_frequency_model\n",
|
||||||
|
" df_predict['max_model'] = max_model_max_frequency_model\n",
|
||||||
|
" df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]\n",
|
||||||
|
" df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" # find_most_common_model()\n",
|
||||||
|
"\n",
|
||||||
|
" df_combined3['ds'] = pd.to_datetime(df_combined3['ds'])\n",
|
||||||
|
" df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d')\n",
|
||||||
|
" df_predict2 = df_combined3.tail(horizon)\n",
|
||||||
|
"\n",
|
||||||
|
" # 保存到数据库\n",
|
||||||
|
" # if not sqlitedb.check_table_exists('accuracy'):\n",
|
||||||
|
" # columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])\n",
|
||||||
|
" # sqlitedb.create_table('accuracy',columns=columns)\n",
|
||||||
|
" # existing_data = sqlitedb.select_data(table_name = \"accuracy\")\n",
|
||||||
|
"\n",
|
||||||
|
" # if not existing_data.empty:\n",
|
||||||
|
" # max_id = existing_data['id'].astype(int).max()\n",
|
||||||
|
" # df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))\n",
|
||||||
|
" # else:\n",
|
||||||
|
" # df_predict2['id'] = range(1, 1 + len(df_predict2))\n",
|
||||||
|
" # df_predict2['CREAT_DATE'] = now if end_time == '' else end_time\n",
|
||||||
|
" # df_predict2['CREAT_DATE'] = end_time\n",
|
||||||
|
" # def get_common_columns(df1, df2):\n",
|
||||||
|
" # # 获取两个DataFrame的公共列名\n",
|
||||||
|
" # return list(set(df1.columns).intersection(df2.columns))\n",
|
||||||
|
"\n",
|
||||||
|
" # common_columns = get_common_columns(df_predict2, existing_data)\n",
|
||||||
|
" # try:\n",
|
||||||
|
" # df_predict2[common_columns].to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n",
|
||||||
|
" # except:\n",
|
||||||
|
" # df_predict2.to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n",
|
||||||
|
" \n",
|
||||||
|
" # 更新accuracy表中的y值\n",
|
||||||
|
" # update_y = sqlitedb.select_data(table_name = \"accuracy\",where_condition='y is null')\n",
|
||||||
|
" # if len(update_y) > 0:\n",
|
||||||
|
" # df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]\n",
|
||||||
|
" # if len(df_combined4) > 0: \n",
|
||||||
|
" # for index, row in df_combined4.iterrows():\n",
|
||||||
|
" # try:\n",
|
||||||
|
" # sqlitedb.update_data('accuracy',f\"y = {row['y']}\",f\"ds = '{row['ds']}'\")\n",
|
||||||
|
" # except:\n",
|
||||||
|
" # logger.error(f'更新accuracy表中的y值失败,row={row}')\n",
|
||||||
|
" # 上周准确率计算\n",
|
||||||
|
" # predict_y = sqlitedb.select_data(table_name = \"accuracy\") \n",
|
||||||
|
" # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()\n",
|
||||||
|
" # ids = predict_y['id'].tolist()\n",
|
||||||
|
" # 准确率基准与绘图上下界逻辑一致\n",
|
||||||
|
" # predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']]\n",
|
||||||
|
" # 模型评估前五均值 \n",
|
||||||
|
" # predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1\n",
|
||||||
|
" # predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1\n",
|
||||||
|
" # # 模型评估前十均值 \n",
|
||||||
|
" # predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1.5\n",
|
||||||
|
" # predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1.5\n",
|
||||||
|
" # 模型评估前十最大最小\n",
|
||||||
|
" # allmodelnames 和 predict_y 列 重复的\n",
|
||||||
|
" # allmodelnames = [col for col in allmodelnames if col in predict_y.columns]\n",
|
||||||
|
" # predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1) \n",
|
||||||
|
" # predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1)\n",
|
||||||
|
" # for id in ids:\n",
|
||||||
|
" # row = predict_y[predict_y['id'] == id]\n",
|
||||||
|
" # try:\n",
|
||||||
|
" # sqlitedb.update_data('accuracy',f\"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}\",f\"id = {id}\")\n",
|
||||||
|
" # except:\n",
|
||||||
|
" # logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')\n",
|
||||||
|
" # 拼接市场最高最低价\n",
|
||||||
|
" # xlsfilename = os.path.join(dataset,'数据项下载.xls')\n",
|
||||||
|
" # df2 = pd.read_excel(xlsfilename)[5:]\n",
|
||||||
|
" # df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
|
||||||
|
" # print(df2.shape)\n",
|
||||||
|
" # df = pd.merge(predict_y,df2,on=['ds'],how='left')\n",
|
||||||
|
" # df['ds'] = pd.to_datetime(df['ds'])\n",
|
||||||
|
" # df = df.reindex()\n",
|
||||||
|
"\n",
|
||||||
|
" # 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0\n",
|
||||||
|
" # def is_within_range(row):\n",
|
||||||
|
" # for model in allmodelnames:\n",
|
||||||
|
" # if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']:\n",
|
||||||
|
" # return 1\n",
|
||||||
|
" # else:\n",
|
||||||
|
" # return 0\n",
|
||||||
|
"\n",
|
||||||
|
" # 比较真实最高最低,和预测最高最低 计算准确率\n",
|
||||||
|
" # def calculate_accuracy(row):\n",
|
||||||
|
" # # 全子集情况:\n",
|
||||||
|
" # if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \\\n",
|
||||||
|
" # (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):\n",
|
||||||
|
" # return 1 \n",
|
||||||
|
" # # 无交集情况:\n",
|
||||||
|
" # if row['max_price'] < row['LOW_PRICE'] or \\\n",
|
||||||
|
" # row['min_price'] > row['HIGH_PRICE']:\n",
|
||||||
|
" # return 0\n",
|
||||||
|
" # # 有交集情况:\n",
|
||||||
|
" # else:\n",
|
||||||
|
" # sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])\n",
|
||||||
|
" # middle_diff = sorted_prices[2] - sorted_prices[1]\n",
|
||||||
|
" # price_range = row['HIGH_PRICE'] - row['LOW_PRICE']\n",
|
||||||
|
" # accuracy = middle_diff / price_range\n",
|
||||||
|
" # return accuracy\n",
|
||||||
|
"\n",
|
||||||
|
" # columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']\n",
|
||||||
|
" # df[columns] = df[columns].astype(float)\n",
|
||||||
|
" # df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
|
||||||
|
" # df['ACCURACY'] = df.apply(is_within_range, axis=1)\n",
|
||||||
|
" # 取结束日期上一周的日期\n",
|
||||||
|
" def get_week_date(end_time):\n",
|
||||||
|
" endtime = end_time\n",
|
||||||
|
" endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
|
||||||
|
" up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
|
||||||
|
" up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]\n",
|
||||||
|
" create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]\n",
|
||||||
|
" ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]\n",
|
||||||
|
" return create_dates,ds_dates\n",
|
||||||
|
" \n",
|
||||||
|
" create_dates,ds_dates = get_week_date(end_time)\n",
|
||||||
|
" # 计算准确率并保存结果\n",
|
||||||
|
" def _get_accuracy_rate(df,create_dates,ds_dates):\n",
|
||||||
|
" df3 = df.copy()\n",
|
||||||
|
" df3 = df3[df3['CREAT_DATE'].isin(create_dates)]\n",
|
||||||
|
" df3 = df3[df3['ds'].isin(ds_dates)]\n",
|
||||||
|
" accuracy_rote = 0\n",
|
||||||
|
" for i,group in df3.groupby('CREAT_DATE'):\n",
|
||||||
|
" accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]\n",
|
||||||
|
" df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])\n",
|
||||||
|
" df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}\n",
|
||||||
|
" df4.to_sql(\"accuracy_rote\", con=sqlitedb.connection, if_exists='append', index=False)\n",
|
||||||
|
" # return df4\n",
|
||||||
|
" \n",
|
||||||
|
" # _get_accuracy_rate(df,create_dates,ds_dates)\n",
|
||||||
|
" \n",
|
||||||
|
" def _add_abs_error_rate():\n",
|
||||||
|
" # 计算每个预测值与真实值之间的偏差率\n",
|
||||||
|
" for model in allmodelnames:\n",
|
||||||
|
" df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']\n",
|
||||||
|
"\n",
|
||||||
|
" # 获取每行对应的最小偏差率值\n",
|
||||||
|
" min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)\n",
|
||||||
|
" # 获取每行对应的最小偏差率值对应的列名\n",
|
||||||
|
" min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) \n",
|
||||||
|
" # 将列名索引转换为列名\n",
|
||||||
|
" min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n",
|
||||||
|
" # 获取最小偏差率对应的模型的预测值\n",
|
||||||
|
" min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)\n",
|
||||||
|
" # 将最小偏差率对应的模型的预测值添加到DataFrame中\n",
|
||||||
|
" df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions\n",
|
||||||
|
" df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name\n",
|
||||||
|
" _add_abs_error_rate()\n",
|
||||||
|
"\n",
|
||||||
|
" # 判断 df 的数值列转为float\n",
|
||||||
|
" for col in df_combined3.columns:\n",
|
||||||
|
" try:\n",
|
||||||
|
" if col != 'ds':\n",
|
||||||
|
" df_combined3[col] = df_combined3[col].astype(float)\n",
|
||||||
|
" df_combined3[col] = df_combined3[col].round(2)\n",
|
||||||
|
" except ValueError:\n",
|
||||||
|
" pass\n",
|
||||||
|
" df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False) \n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" # 历史价格+预测价格\n",
|
||||||
|
" sqlitedb.drop_table('testandpredict_groupby')\n",
|
||||||
|
" df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n",
|
||||||
|
" # 新增均值列\n",
|
||||||
|
" df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # def _plt_predict_ture(df):\n",
|
||||||
|
" # lens = df.shape[0] if df.shape[0] < 180 else 90\n",
|
||||||
|
" # df = df[-lens:] # 取180个数据点画图\n",
|
||||||
|
" # # 历史价格\n",
|
||||||
|
" # plt.figure(figsize=(20, 10))\n",
|
||||||
|
" # plt.plot(df['ds'], df['y'], label='真实值')\n",
|
||||||
|
" # # 均值线\n",
|
||||||
|
" # plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')\n",
|
||||||
|
" # # 颜色填充\n",
|
||||||
|
" # plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)\n",
|
||||||
|
" # markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n",
|
||||||
|
" # random_marker = random.choice(markers)\n",
|
||||||
|
" # for model in modelnames:\n",
|
||||||
|
" # # for model in ['BiTCN','RNN']:\n",
|
||||||
|
" # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n",
|
||||||
|
" # # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n",
|
||||||
|
" # # 网格\n",
|
||||||
|
" # plt.grid(True)\n",
|
||||||
|
" # # 显示历史值\n",
|
||||||
|
" # for i, j in zip(df['ds'], df['y']):\n",
|
||||||
|
" # plt.text(i, j, str(j), ha='center', va='bottom')\n",
|
||||||
|
"\n",
|
||||||
|
" # # for model in most_model:\n",
|
||||||
|
" # # plt.plot(df['ds'], df[model], label=model,marker='o')\n",
|
||||||
|
" # # 当前日期画竖虚线\n",
|
||||||
|
" # plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n",
|
||||||
|
" # plt.legend()\n",
|
||||||
|
" # plt.xlabel('日期')\n",
|
||||||
|
" # plt.ylabel('价格')\n",
|
||||||
|
" \n",
|
||||||
|
" # plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n",
|
||||||
|
" # plt.close()\n",
|
||||||
|
" \n",
|
||||||
|
" def _plt_top10_predict_ture():\n",
|
||||||
|
" # 模型评估前十均值画图\n",
|
||||||
|
" df = sqlitedb.select_data(table_name = \"accuracy\")\n",
|
||||||
|
" # CREAT_DATE 去重取id最大的数据\n",
|
||||||
|
" df = df.sort_values(by=['CREAT_DATE','id'],ascending=[False,False]).drop_duplicates(subset=['CREAT_DATE'],keep='last')\n",
|
||||||
|
" print(df.shape)\n",
|
||||||
|
" lens = df.shape[0] if df.shape[0] < 180 else 180 \n",
|
||||||
|
" df = df[-lens:] # 取180个数据点画图\n",
|
||||||
|
" # 历史价格\n",
|
||||||
|
" plt.figure(figsize=(20, 10))\n",
|
||||||
|
" plt.plot(df['ds'], df['y'], label='真实值')\n",
|
||||||
|
" # 均值线\n",
|
||||||
|
" df['mean'] = df[allmodelnames[:10]].mean(axis=1)\n",
|
||||||
|
" plt.plot(df['ds'], df['mean'], color='g', linestyle='--', label='前十模型预测均值')\n",
|
||||||
|
" plt.plot(df['ds'], df['min_price'], color='r', linestyle='--', label='min_price')\n",
|
||||||
|
" plt.plot(df['ds'], df['max_price'], color='r', linestyle='--', label='max_price')\n",
|
||||||
|
" # 颜色填充\n",
|
||||||
|
" plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)\n",
|
||||||
|
" markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n",
|
||||||
|
" random_marker = random.choice(markers)\n",
|
||||||
|
" # for model in modelnames[:5]:\n",
|
||||||
|
" # for model in ['BiTCN','RNN']:\n",
|
||||||
|
" # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n",
|
||||||
|
" # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n",
|
||||||
|
" # 网格\n",
|
||||||
|
" plt.grid(True)\n",
|
||||||
|
" # 显示历史值\n",
|
||||||
|
" # for i, j in zip(df['ds'], df['y']):\n",
|
||||||
|
" # plt.text(i, j, str(j), ha='center', va='bottom')\n",
|
||||||
|
"\n",
|
||||||
|
" # for model in most_model:\n",
|
||||||
|
" # plt.plot(df['ds'], df[model], label=model,marker='o')\n",
|
||||||
|
" # 当前日期画竖虚线\n",
|
||||||
|
" plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n",
|
||||||
|
" plt.legend()\n",
|
||||||
|
" plt.xlabel('日期')\n",
|
||||||
|
" plt.ylabel('价格')\n",
|
||||||
|
" \n",
|
||||||
|
" plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')\n",
|
||||||
|
" plt.close()\n",
|
||||||
|
"\n",
|
||||||
|
" def _plt_predict_table(df): \n",
|
||||||
|
" # 预测值表格\n",
|
||||||
|
" fig, ax = plt.subplots(figsize=(20, 6))\n",
|
||||||
|
" ax.axis('off') # 关闭坐标轴\n",
|
||||||
|
" # 数值保留2位小数\n",
|
||||||
|
" df = df.round(2)\n",
|
||||||
|
" df = df[-horizon:]\n",
|
||||||
|
" df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]\n",
|
||||||
|
" # Day列放到最前面\n",
|
||||||
|
" df = df[['Day'] + list(df.columns[:-1])]\n",
|
||||||
|
" table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')\n",
|
||||||
|
" #加宽表格\n",
|
||||||
|
" table.auto_set_font_size(False)\n",
|
||||||
|
" table.set_fontsize(10)\n",
|
||||||
|
"\n",
|
||||||
|
" # 设置表格样式,列数据最小的用绿色标识\n",
|
||||||
|
" plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')\n",
|
||||||
|
" plt.close()\n",
|
||||||
|
" \n",
|
||||||
|
" def _plt_model_results3():\n",
|
||||||
|
" # 可视化评估结果\n",
|
||||||
|
" plt.rcParams['font.sans-serif'] = ['SimHei']\n",
|
||||||
|
" fig, ax = plt.subplots(figsize=(20, 10))\n",
|
||||||
|
" ax.axis('off') # 关闭坐标轴\n",
|
||||||
|
" table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')\n",
|
||||||
|
" # 加宽表格\n",
|
||||||
|
" table.auto_set_font_size(False)\n",
|
||||||
|
" table.set_fontsize(10)\n",
|
||||||
|
"\n",
|
||||||
|
" # 设置表格样式,列数据最小的用绿色标识\n",
|
||||||
|
" plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')\n",
|
||||||
|
" plt.close()\n",
|
||||||
|
"\n",
|
||||||
|
" # _plt_predict_ture(df_combined3)\n",
|
||||||
|
" _plt_top10_predict_ture()\n",
|
||||||
|
" _plt_predict_table(df_combined3)\n",
|
||||||
|
" _plt_model_results3()\n",
|
||||||
|
"\n",
|
||||||
|
" return model_results3\n",
|
||||||
|
" \n",
|
||||||
|
"model_losss(sqlitedb=sqlitedb,end_time='2024-12-16')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8ce1967f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "base",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
203
测试环境获取市场信息平台数据项.ipynb
Normal file
203
测试环境获取市场信息平台数据项.ipynb
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "31c0e11d-c87a-4e95-92a0-d1d09625e255",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from config_jingbo import *\n",
|
||||||
|
"import requests\n",
|
||||||
|
"import json\n",
|
||||||
|
"import datetime"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "83c81b9e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'http://192.168.100.53:8080/jingbo-dev/api/server/login'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"login_pushreport_url\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a058f507",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "2b330ee3-c006-4ab1-8558-59c51ac8d86f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'data': {'account': 'api_test',\n",
|
||||||
|
" 'password': 'ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=',\n",
|
||||||
|
" 'tenantHashCode': '8a4577dbd919675758d57999a1e891fe',\n",
|
||||||
|
" 'terminal': 'API'},\n",
|
||||||
|
" 'funcModule': 'API',\n",
|
||||||
|
" 'funcOperation': '获取token'}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"login_data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 28,
|
||||||
|
"id": "dcb6100a-ed2b-4077-a1a9-361c6cb565f9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"def get_head_auth_report():\n",
|
||||||
|
" login_res = requests.post(url=login_pushreport_url, json=login_data, timeout=(3, 5))\n",
|
||||||
|
" text = json.loads(login_res.text)\n",
|
||||||
|
" print(text)\n",
|
||||||
|
" if text[\"status\"]:\n",
|
||||||
|
" token = text[\"data\"][\"accessToken\"]\n",
|
||||||
|
" return token\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 29,
|
||||||
|
"id": "22c0c7c4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzUxNDkzMzYsImp0aSI6IjczYzJkOGJjYzQ2NzQwYjNiYWQxZmI3NjMzODM4YTcxIn0.zLVuyCEbg-x9lRXuJDYbdiwzo_nhEQGCCInnJKfQcd8', 'md5Token': '39413fe9e3e93f717d8d2713c4487172'}, 'status': True}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"token = get_head_auth_report()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 30,
|
||||||
|
"id": "12077ead",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzE5NTkzNjUsImp0aSI6IjRiMjcwNTgzN2YyZDQxOWM4MzQ3NjI2NDQwZDlhZGQzIn0.PPgnoiJt412dJiceqVW8w7qkJFY4s-VqU9z6ZIkpqho'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"id": "a7ae21d1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# def upload_warning_data(warning_data):\n",
|
||||||
|
"# token = get_head_auth_report()\n",
|
||||||
|
"# warning_data = warning_data\n",
|
||||||
|
"# headers = {\"Authorization\": token}\n",
|
||||||
|
"# logger.info(\"预警上传中...\")\n",
|
||||||
|
"# logger.info(f\"token:{token}\")\n",
|
||||||
|
"# logger.info(f\"warning_data:{warning_data}\" )\n",
|
||||||
|
"# upload_res = requests.post(url=upload_warning_url, headers=headers, json=warning_data, timeout=(3, 15))\n",
|
||||||
|
"# if upload_res:\n",
|
||||||
|
"# return upload_res\n",
|
||||||
|
"# else:\n",
|
||||||
|
"# logger.info(\"预警上传失败\")\n",
|
||||||
|
"# return None\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# logger.info(f'上传预警信息')\n",
|
||||||
|
"# try:\n",
|
||||||
|
"# warning_date = datetime.datetime.now().strftime('%Y-%m-%d')\n",
|
||||||
|
"# content = f'{warning_date}有2887个停更'\n",
|
||||||
|
"# warning_data['data']['WARNING_DATE'] = warning_date\n",
|
||||||
|
"# warning_data['data']['WARNING_CONTENT'] = content\n",
|
||||||
|
"# upload_warning_data(warning_data)\n",
|
||||||
|
"# logger.info(f'上传预警信息成功')\n",
|
||||||
|
"# except Exception as e:\n",
|
||||||
|
"# logger.error(f'上传预警信息失败:{e}')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "54942e1a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"{\"confirmFlg\":false,\"data\":[],\"status\":true}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"query_data_list_item_nos_data = {\n",
|
||||||
|
" \"funcModule\":'数据项编码集合',\n",
|
||||||
|
" \"funcOperation\":'数据项编码集合',\n",
|
||||||
|
" \"data\":{\n",
|
||||||
|
" \"dataItemNoList\":['EXCHANGE|RATE|MIDDLE_PRICE'],\n",
|
||||||
|
" \"dateEnd\":'20240101',\n",
|
||||||
|
" \"dateStart\":'20241024'\n",
|
||||||
|
" \n",
|
||||||
|
" }\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"headers = {\"Authorization\": token}\n",
|
||||||
|
"items_res = requests.post(url=query_data_list_item_nos_url, headers=headers, json=query_data_list_item_nos_data, timeout=(3, 35))\n",
|
||||||
|
"print(items_res.text)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "base",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user