upload_url = ""
# upload_url = "" # zhaoqiwei
upload_warning_url = ""
+query_data_list_item_nos_url = ""
login_data = {
"data": {
+query_data_list_item_nos_data = {
+ "funcModule":'数据项管理',
+ "funcOperation":'查询数据项编码',
+ "data":{
+ "dataItemNoList":['Brent活跃合约',''],
+ "dateEnd":'',
+ "dateStart":'2023-01-01'
+ }
# 北京环境数据库
host = ''
port = 3306
@@ -107,6 +107,20 @@ def predict_main():
sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys())
+ # 更新accuracy表的y值
+ if not sqlitedb.check_table_exists('accuracy'):
+ pass
+ else:
+ update_y = sqlitedb.select_data('accuracy',where_condition="y is null")
+ if len(update_y) > 0:
+ logger.info('更新accuracy表的y值')
+ # 找到update_y 中ds且df中的y的行
+ update_y = update_y[update_y['ds']<=end_time]
+ for row in update_y.itertuples(index=False):
+ row_dict = row._asdict()
+ yy = df[df['ds']==row_dict['ds']]['y'].values[0]
+ sqlitedb.update_data('accuracy', f"y = {yy}", where_condition=f"ds = '{row_dict['ds']}'")
import datetime
# 判断当前日期是不是周一
is_weekday = datetime.datetime.now().weekday() == 0
global end_time
is_on = True
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
- for i_time in pd.date_range('2024-10-07', '2024-12-16', freq='B'):
+ for i_time in pd.date_range('2024-10-29', '2024-12-16', freq='B'):
end_time = i_time.strftime('%Y-%m-%d')
if is_on:
@@ -401,8 +401,6 @@ def model_losss(sqlitedb,end_time):
return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])
def find_most_common_model():
# 最多频率的模型名称
min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()
@@ -445,17 +443,7 @@ def model_losss(sqlitedb,end_time):
df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
- # 更新accuracy表中的y值
- update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null')
- if len(update_y) > 0:
- df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]
- if len(df_combined4) > 0:
- for index, row in df_combined4.iterrows():
- try:
- sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'")
- except:
- logger.error(f'更新accuracy表中的y值失败,row={row}')
# 上周准确率计算
predict_y = sqlitedb.select_data(table_name = "accuracy")
# ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
@@ -479,6 +467,8 @@ def model_losss(sqlitedb,end_time):
sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
# 拼接市场最高最低价
xlsfilename = os.path.join(dataset,'数据项下载.xls')
df2 = pd.read_excel(xlsfilename)[5:]
@@ -496,6 +486,7 @@ def model_losss(sqlitedb,end_time):
return 0
+ # 定义一个函数来计算准确率
# 比较真实最高最低,和预测最高最低 计算准确率
def calculate_accuracy(row):
# 全子集情况:
@@ -527,430 +518,26 @@ def model_losss(sqlitedb,end_time):
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
return create_dates,ds_dates
- create_dates,ds_dates = get_week_date(end_time)
# 计算准确率并保存结果
- def _get_accuracy_rate(df,create_dates,ds_dates):
+ def _get_accuracy_rate(df,create_dates,ds_dates,endtime):
df3 = df.copy()
df3 = df3[df3['CREAT_DATE'].isin(create_dates)]
df3 = df3[df3['ds'].isin(ds_dates)]
accuracy_rote = 0
for i,group in df3.groupby('CREAT_DATE'):
- accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
- df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
- df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
- df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
- # return df4
- _get_accuracy_rate(df,create_dates,ds_dates)
- def _add_abs_error_rate():
- # 计算每个预测值与真实值之间的偏差率
- for model in allmodelnames:
- df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']
- # 获取每行对应的最小偏差率值
- min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)
- # 获取每行对应的最小偏差率值对应的列名
- min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1)
- # 将列名索引转换为列名
- min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])
- # 获取最小偏差率对应的模型的预测值
- min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)
- # 将最小偏差率对应的模型的预测值添加到DataFrame中
- df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions
- df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name
- # _add_abs_error_rate()
- # 判断 df 的数值列转为float
- for col in df_combined3.columns:
- try:
- if col != 'ds':
- df_combined3[col] = df_combined3[col].astype(float)
- df_combined3[col] = df_combined3[col].round(2)
- except ValueError:
- pass
- df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False)
- # 历史价格+预测价格
- sqlitedb.drop_table('testandpredict_groupby')
- df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)
- # 新增均值列
- df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)
- def _plt_predict_ture(df):
- lens = df.shape[0] if df.shape[0] < 180 else 90
- df = df[-lens:] # 取180个数据点画图
- # 历史价格
- plt.figure(figsize=(20, 10))
- plt.plot(df['ds'], df['y'], label='真实值')
- # 均值线
- plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')
- # 颜色填充
- plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)
- markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
- random_marker = random.choice(markers)
- for model in modelnames:
- # for model in ['BiTCN','RNN']:
- plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)
- # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')
- # 网格
- plt.grid(True)
- # 显示历史值
- for i, j in zip(df['ds'], df['y']):
- plt.text(i, j, str(j), ha='center', va='bottom')
- # for model in most_model:
- # plt.plot(df['ds'], df[model], label=model,marker='o')
- # 当前日期画竖虚线
- plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')
- plt.legend()
- plt.xlabel('日期')
- plt.ylabel('价格')
- plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')
- plt.close()
- def _plt_predict_table(df):
- # 预测值表格
- fig, ax = plt.subplots(figsize=(20, 6))
- ax.axis('off') # 关闭坐标轴
- # 数值保留2位小数
- df = df.round(2)
- df = df[-horizon:]
- df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]
- # Day列放到最前面
- df = df[['Day'] + list(df.columns[:-1])]
- table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')
- #加宽表格
- table.auto_set_font_size(False)
- table.set_fontsize(10)
- # 设置表格样式,列数据最小的用绿色标识
- plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')
- plt.close()
- def _plt_model_results3():
- # 可视化评估结果
- plt.rcParams['font.sans-serif'] = ['SimHei']
- fig, ax = plt.subplots(figsize=(20, 10))
- ax.axis('off') # 关闭坐标轴
- table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')
- # 加宽表格
- table.auto_set_font_size(False)
- table.set_fontsize(10)
- # 设置表格样式,列数据最小的用绿色标识
- plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')
- plt.close()
- _plt_predict_ture(df_combined3)
- _plt_predict_table(df_combined3)
- _plt_model_results3()
- return model_results3
-# 原油计算预测评估指数
-def model_losss_bak(sqlitedb,end_time):
- global dataset
- global rote
- most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]
- most_model_name = most_model[0]
- # 预测数据处理 predict
- df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv"))
- df_combined = dateConvert(df_combined)
- # 删除空列
- df_combined.dropna(axis=1,inplace=True)
- # 删除缺失值,预测过程不能有缺失值
- df_combined.dropna(inplace=True)
- # 其他列转为数值类型
- df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })
- # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值
- df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')
- # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列
- df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]
- # 删除模型生成的cutoff列
- df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)
- # 获取模型名称
- modelnames = df_combined.columns.to_list()[1:]
- if 'y' in modelnames:
- modelnames.remove('y')
- df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要
- # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE
- cellText = []
- # 遍历模型名称,计算模型评估指标
- for model in modelnames:
- modelmse = mse(df_combined['y'], df_combined[model])
- modelrmse = rmse(df_combined['y'], df_combined[model])
- modelmae = mae(df_combined['y'], df_combined[model])
- # modelmape = mape(df_combined['y'], df_combined[model])
- # modelsmape = smape(df_combined['y'], df_combined[model])
- # modelr2 = r2_score(df_combined['y'], df_combined[model])
- cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])
- model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])
- # 按MSE降序排列
- model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)
- model_results3.to_csv(os.path.join(dataset,"model_evaluation.csv"),index=False)
- modelnames = model_results3['模型(Model)'].tolist()
- allmodelnames = modelnames.copy()
- # 保存5个最佳模型的名称
- if len(modelnames) > 5:
- modelnames = modelnames[0:5]
- if is_fivemodels:
- pass
- else:
- with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f:
- f.write(','.join(modelnames) + '\n')
- # 预测值与真实值对比图
- plt.rcParams['font.sans-serif'] = ['SimHei']
- plt.figure(figsize=(15, 10))
- for n,model in enumerate(modelnames[:5]):
- plt.subplot(3, 2, n+1)
- plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')
- plt.plot(df_combined3['ds'], df_combined3[model], label=model)
- plt.legend()
- plt.xlabel('日期')
- plt.ylabel('价格')
- plt.title(model+'拟合')
- plt.subplots_adjust(hspace=0.5)
- plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')
- plt.close()
- # # 历史数据+预测数据
- # # 拼接未来时间预测
- df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))
- df_predict.drop('unique_id',inplace=True,axis=1)
- df_predict.dropna(axis=1,inplace=True)
- try:
- df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')
- except ValueError :
- df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')
- # def first_row_to_database(df):
- # # # 取第一行数据存储到数据库中
- # first_row = df.head(1)
- # first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00')
- # # 将预测结果保存到数据库
- # if not sqlitedb.check_table_exists('trueandpredict'):
- # first_row.to_sql('trueandpredict',sqlitedb.connection,index=False)
- # else:
- # for col in first_row.columns:
- # sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT')
- # for row in first_row.itertuples(index=False):
- # row_dict = row._asdict()
- # columns=row_dict.keys()
- # check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'")
- # if len(check_query) > 0:
- # set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
- # sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
- # continue
- # sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)
- # first_row_to_database(df_predict)
- df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)
- # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值
- names = []
- names_df = df_combined3.copy()
- for col in allmodelnames:
- names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name]
- names.append(f'{col}-{most_model_name}-误差比例')
- names_df = names_df[names]
- def add_rote_column(row):
- columns = []
- for r in names_df.columns:
- if row[r] <= rote:
- columns.append(r.split('-')[0])
- return pd.Series([columns], index=['columns'])
- names_df['columns'] = names_df.apply(add_rote_column, axis=1)
- def add_upper_lower_bound(row):
- # 计算上边界值
- upper_bound = row.max()
- # 计算下边界值
- lower_bound = row.min()
- return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
- # df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
- # 取前五最佳模型的最大最小值作为上下边界值
- df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)
- def find_closest_values(row):
- x = row.y
- if x is None or np.isnan(x):
- return pd.Series([None, None], index=['min_price','max_price'])
- # row = row.drop('ds')
- row = row.values.tolist()
- row.sort()
- print(row)
- # x 在row中的索引
- index = row.index(x)
- if index == 0:
- return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price'])
- elif index == len(row)-1:
- return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price'])
- else:
- return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])
- def find_most_common_model():
- # 最多频率的模型名称
- min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()
- max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax()
- if min_model_max_frequency_model == max_model_max_frequency_model:
- # 取60天第二多的模型
- max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1]
- df_predict['min_model'] = min_model_max_frequency_model
- df_predict['max_model'] = max_model_max_frequency_model
- df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]
- df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]
- # find_most_common_model()
- df_combined3['ds'] = pd.to_datetime(df_combined3['ds'])
- df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d')
- df_predict2 = df_combined3.tail(horizon)
- # 保存到数据库
- if not sqlitedb.check_table_exists('accuracy'):
- columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])
- sqlitedb.create_table('accuracy',columns=columns)
- existing_data = sqlitedb.select_data(table_name = "accuracy")
- if not existing_data.empty:
- max_id = existing_data['id'].astype(int).max()
- df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))
- else:
- df_predict2['id'] = range(1, 1 + len(df_predict2))
- # df_predict2['CREAT_DATE'] = now if end_time == '' else end_time
- df_predict2['CREAT_DATE'] = end_time
- def get_common_columns(df1, df2):
- # 获取两个DataFrame的公共列名
- return list(set(df1.columns).intersection(df2.columns))
- common_columns = get_common_columns(df_predict2, existing_data)
- try:
- df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
- except:
- df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False)
- # 更新accuracy表中的y值
- update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null')
- if len(update_y) > 0:
- df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]
- if len(df_combined4) > 0:
- for index, row in df_combined4.iterrows():
- try:
- sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'")
- except:
- logger.error(f'更新accuracy表中的y值失败,row={row}')
- # 上周准确率计算
- predict_y = sqlitedb.select_data(table_name = "accuracy")
- # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()
- ids = predict_y['id'].tolist()
- # 准确率基准与绘图上下界逻辑一致
- # predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']]
- # 模型评估前五均值
- predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1
- predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1
- # 模型评估前十均值
- # predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1
- # predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1
- # 模型评估前十最大最小
- # allmodelnames 和 predict_y 列 重复的
- # allmodelnames = [col for col in allmodelnames if col in predict_y.columns]
- # predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1)
- # predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1)
- for id in ids:
- row = predict_y[predict_y['id'] == id]
- try:
- sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}")
- except:
- logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')
- # 拼接市场最高最低价
- xlsfilename = os.path.join(dataset,'数据项下载.xls')
- df2 = pd.read_excel(xlsfilename)[5:]
- df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})
- print(df2.shape)
- df = pd.merge(predict_y,df2,on=['ds'],how='left')
- df['ds'] = pd.to_datetime(df['ds'])
- df = df.reindex()
- # 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0
- def is_within_range(row):
- for model in allmodelnames:
- if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']:
- return 1
- else:
- return 0
- # 比较真实最高最低,和预测最高最低 计算准确率
- def calculate_accuracy(row):
- # 全子集情况:
- if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \
- (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):
- return 1
- # 无交集情况:
- if row['max_price'] < row['LOW_PRICE'] or \
- row['min_price'] > row['HIGH_PRICE']:
- return 0
- # 有交集情况:
- else:
- sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])
- middle_diff = sorted_prices[2] - sorted_prices[1]
- price_range = row['HIGH_PRICE'] - row['LOW_PRICE']
- accuracy = middle_diff / price_range
- return accuracy
- columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']
- df[columns] = df[columns].astype(float)
- df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)
- # df['ACCURACY'] = df.apply(is_within_range, axis=1)
- # 取结束日期上一周的日期
- def get_week_date(end_time):
- endtime = end_time
- endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')
- up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)
- up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2]
- up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates]
- return up_week_dates
- up_week_dates = get_week_date(end_time)
- # 计算准确率并保存结果
- def _get_accuracy_rate(df,up_week_dates,endtime):
- df3 = df.copy()
- df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)]
- df3 = df3[df3['ds'].isin(up_week_dates)]
- accuracy_rote = 0
- for i,group in df3.groupby('ds'):
- print('权重:',weight_dict[len(group)-1])
- print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])
+ # print('日期:',i)
+ # print(group)
+ # print('权重:',weight_dict[len(group)-1])
+ # print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1])
accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]
df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])
- df4.loc[len(df4)] = {'开始日期':up_week_dates[0],'结束日期':up_week_dates[-1],'准确率':accuracy_rote}
+ df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}
+ df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False)
df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False)
- _get_accuracy_rate(df,up_week_dates,end_time)
+ create_dates,ds_dates = get_week_date(end_time)
+ _get_accuracy_rate(df,create_dates,ds_dates,end_time)
def _add_abs_error_rate():
# 计算每个预测值与真实值之间的偏差率
@@ -1215,7 +802,7 @@ def model_losss_juxiting(sqlitedb):
df_predict2 = df_predict.copy()
df_predict2['ds'] = pd.to_datetime(df_predict2['ds'])
- df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')
+ df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d')
def _add_abs_error_rate():
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "7fadc60c-d710-4b8c-89cd-1d889ece1eaf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "数据库连接成功 jingbo_test root\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 读取配置\n",
+ "# 父目录下的lib\n",
+ "from lib.dataread import *\n",
+ "from lib.tools import Graphs,mse,rmse,mae,exception_logger\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0e5b6f30-b7ca-4718-97a3-48b54156e07f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(51, 30)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
+ "\n",
+ "
+ " \n",
+ " \n",
+ " | \n",
+ " 模型(Model) | \n",
+ " 平均平方误差(MSE) | \n",
+ " 均方根误差(RMSE) | \n",
+ " 平均绝对误差(MAE) | \n",
+ "
+ " \n",
+ " \n",
+ " \n",
+ " 11 | \n",
+ " DilatedRNN | \n",
+ " 1.567000 | \n",
+ " 1.252 | \n",
+ " 0.978 | \n",
+ "
+ " \n",
+ " 14 | \n",
+ " NLinear | \n",
+ " 1.905000 | \n",
+ " 1.380 | \n",
+ " 1.104 | \n",
+ "
+ " \n",
+ " 10 | \n",
+ " BiTCN | \n",
+ " 1.906000 | \n",
+ " 1.380 | \n",
+ " 1.042 | \n",
+ "
+ " \n",
+ " 6 | \n",
+ " PatchTST | \n",
+ " 1.939000 | \n",
+ " 1.393 | \n",
+ " 1.129 | \n",
+ "
+ " \n",
+ " 19 | \n",
+ " TiDE | \n",
+ " 1.967000 | \n",
+ " 1.402 | \n",
+ " 1.090 | \n",
+ "
+ " \n",
+ " 4 | \n",
+ " TSMixer | \n",
+ " 2.056000 | \n",
+ " 1.434 | \n",
+ " 1.111 | \n",
+ "
+ " \n",
+ " 7 | \n",
+ " RNN | \n",
+ " 2.101000 | \n",
+ " 1.449 | \n",
+ " 1.144 | \n",
+ "
+ " \n",
+ " 13 | \n",
+ " DLinear | \n",
+ " 2.162000 | \n",
+ " 1.470 | \n",
+ " 1.178 | \n",
+ "
+ " \n",
+ " 15 | \n",
+ " TFT | \n",
+ " 2.196000 | \n",
+ " 1.482 | \n",
+ " 1.137 | \n",
+ "
+ " \n",
+ " 16 | \n",
+ " FEDformer | \n",
+ " 2.211000 | \n",
+ " 1.487 | \n",
+ " 1.239 | \n",
+ "
+ " \n",
+ " 9 | \n",
+ " TCN | \n",
+ " 2.397000 | \n",
+ " 1.548 | \n",
+ " 1.276 | \n",
+ "
+ " \n",
+ " 0 | \n",
+ " NHITS | \n",
+ " 2.454000 | \n",
+ " 1.567 | \n",
+ " 1.190 | \n",
+ "
+ " \n",
+ " 12 | \n",
+ " MLP | \n",
+ " 2.468000 | \n",
+ " 1.571 | \n",
+ " 1.224 | \n",
+ "
+ " \n",
+ " 5 | \n",
+ " TSMixerx | \n",
+ " 2.490000 | \n",
+ " 1.578 | \n",
+ " 1.231 | \n",
+ "
+ " \n",
+ " 1 | \n",
+ " Informer | \n",
+ " 3.095000 | \n",
+ " 1.759 | \n",
+ " 1.352 | \n",
+ "
+ " \n",
+ " 20 | \n",
+ " DeepNPTS | \n",
+ " 3.267000 | \n",
+ " 1.808 | \n",
+ " 1.357 | \n",
+ "
+ " \n",
+ " 8 | \n",
+ " GRU | \n",
+ " 5.172000 | \n",
+ " 2.274 | \n",
+ " 1.909 | \n",
+ "
+ " \n",
+ " 2 | \n",
+ " LSTM | \n",
+ " 6.844000 | \n",
+ " 2.616 | \n",
+ " 2.386 | \n",
+ "
+ " \n",
+ " 18 | \n",
+ " MLPMultivariate | \n",
+ " 8.163000 | \n",
+ " 2.857 | \n",
+ " 2.221 | \n",
+ "
+ " \n",
+ " 17 | \n",
+ " StemGNN | \n",
+ " 17.216000 | \n",
+ " 4.149 | \n",
+ " 3.359 | \n",
+ "
+ " \n",
+ " 3 | \n",
+ " iTransformer | \n",
+ " 21.568001 | \n",
+ " 4.644 | \n",
+ " 3.487 | \n",
+ "
+ " \n",
+ "
+ "
+ ],
+ "text/plain": [
+ " 模型(Model) 平均平方误差(MSE) 均方根误差(RMSE) 平均绝对误差(MAE)\n",
+ "11 DilatedRNN 1.567000 1.252 0.978\n",
+ "14 NLinear 1.905000 1.380 1.104\n",
+ "10 BiTCN 1.906000 1.380 1.042\n",
+ "6 PatchTST 1.939000 1.393 1.129\n",
+ "19 TiDE 1.967000 1.402 1.090\n",
+ "4 TSMixer 2.056000 1.434 1.111\n",
+ "7 RNN 2.101000 1.449 1.144\n",
+ "13 DLinear 2.162000 1.470 1.178\n",
+ "15 TFT 2.196000 1.482 1.137\n",
+ "16 FEDformer 2.211000 1.487 1.239\n",
+ "9 TCN 2.397000 1.548 1.276\n",
+ "0 NHITS 2.454000 1.567 1.190\n",
+ "12 MLP 2.468000 1.571 1.224\n",
+ "5 TSMixerx 2.490000 1.578 1.231\n",
+ "1 Informer 3.095000 1.759 1.352\n",
+ "20 DeepNPTS 3.267000 1.808 1.357\n",
+ "8 GRU 5.172000 2.274 1.909\n",
+ "2 LSTM 6.844000 2.616 2.386\n",
+ "18 MLPMultivariate 8.163000 2.857 2.221\n",
+ "17 StemGNN 17.216000 4.149 3.359\n",
+ "3 iTransformer 21.568001 4.644 3.487"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "# 原油计算预测评估指数\n",
+ "@exception_logger\n",
+ "def model_losss(sqlitedb,end_time):\n",
+ " global dataset\n",
+ " global rote\n",
+ " most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]\n",
+ " most_model_name = most_model[0]\n",
+ "\n",
+ " # 预测数据处理 predict\n",
+ " df_combined = loadcsv(os.path.join(dataset,\"cross_validation.csv\")) \n",
+ " df_combined = dateConvert(df_combined)\n",
+ " # 删除空列\n",
+ " df_combined.dropna(axis=1,inplace=True)\n",
+ " # 删除缺失值,预测过程不能有缺失值\n",
+ " df_combined.dropna(inplace=True) \n",
+ " # 其他列转为数值类型\n",
+ " df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })\n",
+ " # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值\n",
+ " df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')\n",
+ "\n",
+ " # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列\n",
+ " df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]\n",
+ " # 删除模型生成的cutoff列\n",
+ " df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)\n",
+ " # 获取模型名称\n",
+ " modelnames = df_combined.columns.to_list()[1:] \n",
+ " if 'y' in modelnames:\n",
+ " modelnames.remove('y')\n",
+ " # df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要\n",
+ " df_combined3 = sqlitedb.select_data('accuracy')\n",
+ "\n",
+ "\n",
+ " # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE\n",
+ " cellText = []\n",
+ "\n",
+ " # 遍历模型名称,计算模型评估指标 \n",
+ " for model in modelnames:\n",
+ " modelmse = mse(df_combined['y'], df_combined[model])\n",
+ " modelrmse = rmse(df_combined['y'], df_combined[model])\n",
+ " modelmae = mae(df_combined['y'], df_combined[model])\n",
+ " # modelmape = mape(df_combined['y'], df_combined[model])\n",
+ " # modelsmape = smape(df_combined['y'], df_combined[model])\n",
+ " # modelr2 = r2_score(df_combined['y'], df_combined[model])\n",
+ " cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])\n",
+ " \n",
+ " model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])\n",
+ " # 按MSE降序排列\n",
+ " model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)\n",
+ " model_results3.to_csv(os.path.join(dataset,\"model_evaluation.csv\"),index=False)\n",
+ " modelnames = model_results3['模型(Model)'].tolist()\n",
+ " allmodelnames = modelnames.copy()\n",
+ " # # 保存5个最佳模型的名称\n",
+ " # if len(modelnames) > 5:\n",
+ " # modelnames = modelnames[0:5]\n",
+ " # if is_fivemodels:\n",
+ " # pass\n",
+ " # else:\n",
+ " # with open(os.path.join(dataset,\"best_modelnames.txt\"), 'w') as f:\n",
+ " # f.write(','.join(modelnames) + '\\n')\n",
+ "\n",
+ " # # 预测值与真实值对比图\n",
+ " # plt.rcParams['font.sans-serif'] = ['SimHei']\n",
+ " # plt.figure(figsize=(15, 10))\n",
+ " # for n,model in enumerate(modelnames[:5]):\n",
+ " # plt.subplot(3, 2, n+1)\n",
+ " # plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')\n",
+ " # plt.plot(df_combined3['ds'], df_combined3[model], label=model)\n",
+ " # plt.legend()\n",
+ " # plt.xlabel('日期')\n",
+ " # plt.ylabel('价格')\n",
+ " # plt.title(model+'拟合')\n",
+ " # plt.subplots_adjust(hspace=0.5)\n",
+ " # plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')\n",
+ " # plt.close()\n",
+ " \n",
+ " \n",
+ " # # 历史数据+预测数据\n",
+ " # # 拼接未来时间预测\n",
+ " df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))\n",
+ " df_predict.drop('unique_id',inplace=True,axis=1)\n",
+ " df_predict.dropna(axis=1,inplace=True)\n",
+ "\n",
+ " try:\n",
+ " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n",
+ " except ValueError :\n",
+ " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n",
+ "\n",
+ " \n",
+ " df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n",
+ "\n",
+ " # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值\n",
+ " # names = []\n",
+ " # names_df = df_combined3.copy()\n",
+ " # for col in allmodelnames:\n",
+ " # names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name]\n",
+ " # names.append(f'{col}-{most_model_name}-误差比例')\n",
+ "\n",
+ " # names_df = names_df[names]\n",
+ " # def add_rote_column(row):\n",
+ " # columns = []\n",
+ " # for r in names_df.columns:\n",
+ " # if row[r] <= rote:\n",
+ " # columns.append(r.split('-')[0])\n",
+ " # return pd.Series([columns], index=['columns'])\n",
+ " # names_df['columns'] = names_df.apply(add_rote_column, axis=1)\n",
+ " \n",
+ " def add_upper_lower_bound(row):\n",
+ "\n",
+ " # 计算上边界值\n",
+ " upper_bound = row.max()\n",
+ " # 计算下边界值\n",
+ " lower_bound = row.min()\n",
+ " return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])\n",
+ "\n",
+ " # df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)\n",
+ "\n",
+ " # 取前五最佳模型的最大最小值作为上下边界值\n",
+ " # df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)\n",
+ " \n",
+ " def find_closest_values(row):\n",
+ " x = row.y\n",
+ " if x is None or np.isnan(x):\n",
+ " return pd.Series([None, None], index=['min_price','max_price'])\n",
+ " # row = row.drop('ds')\n",
+ " row = row.values.tolist()\n",
+ " row.sort()\n",
+ " print(row)\n",
+ " # x 在row中的索引\n",
+ " index = row.index(x)\n",
+ " if index == 0:\n",
+ " return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price'])\n",
+ " elif index == len(row)-1:\n",
+ " return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price'])\n",
+ " else:\n",
+ " return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])\n",
+ "\n",
+ "\n",
+ " \n",
+ " def find_most_common_model():\n",
+ " # 最多频率的模型名称\n",
+ " min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()\n",
+ " max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax()\n",
+ " if min_model_max_frequency_model == max_model_max_frequency_model:\n",
+ " # 取60天第二多的模型\n",
+ " max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1]\n",
+ "\n",
+ " df_predict['min_model'] = min_model_max_frequency_model\n",
+ " df_predict['max_model'] = max_model_max_frequency_model\n",
+ " df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]\n",
+ " df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]\n",
+ "\n",
+ "\n",
+ " # find_most_common_model()\n",
+ "\n",
+ " df_combined3['ds'] = pd.to_datetime(df_combined3['ds'])\n",
+ " df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d')\n",
+ " df_predict2 = df_combined3.tail(horizon)\n",
+ "\n",
+ " # 保存到数据库\n",
+ " # if not sqlitedb.check_table_exists('accuracy'):\n",
+ " # columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])\n",
+ " # sqlitedb.create_table('accuracy',columns=columns)\n",
+ " # existing_data = sqlitedb.select_data(table_name = \"accuracy\")\n",
+ "\n",
+ " # if not existing_data.empty:\n",
+ " # max_id = existing_data['id'].astype(int).max()\n",
+ " # df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))\n",
+ " # else:\n",
+ " # df_predict2['id'] = range(1, 1 + len(df_predict2))\n",
+ " # df_predict2['CREAT_DATE'] = now if end_time == '' else end_time\n",
+ " # df_predict2['CREAT_DATE'] = end_time\n",
+ " # def get_common_columns(df1, df2):\n",
+ " # # 获取两个DataFrame的公共列名\n",
+ " # return list(set(df1.columns).intersection(df2.columns))\n",
+ "\n",
+ " # common_columns = get_common_columns(df_predict2, existing_data)\n",
+ " # try:\n",
+ " # df_predict2[common_columns].to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n",
+ " # except:\n",
+ " # df_predict2.to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n",
+ " \n",
+ " # 更新accuracy表中的y值\n",
+ " # update_y = sqlitedb.select_data(table_name = \"accuracy\",where_condition='y is null')\n",
+ " # if len(update_y) > 0:\n",
+ " # df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]\n",
+ " # if len(df_combined4) > 0: \n",
+ " # for index, row in df_combined4.iterrows():\n",
+ " # try:\n",
+ " # sqlitedb.update_data('accuracy',f\"y = {row['y']}\",f\"ds = '{row['ds']}'\")\n",
+ " # except:\n",
+ " # logger.error(f'更新accuracy表中的y值失败,row={row}')\n",
+ " # 上周准确率计算\n",
+ " # predict_y = sqlitedb.select_data(table_name = \"accuracy\") \n",
+ " # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()\n",
+ " # ids = predict_y['id'].tolist()\n",
+ " # 准确率基准与绘图上下界逻辑一致\n",
+ " # predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']]\n",
+ " # 模型评估前五均值 \n",
+ " # predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1\n",
+ " # predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1\n",
+ " # # 模型评估前十均值 \n",
+ " # predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1.5\n",
+ " # predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1.5\n",
+ " # 模型评估前十最大最小\n",
+ " # allmodelnames 和 predict_y 列 重复的\n",
+ " # allmodelnames = [col for col in allmodelnames if col in predict_y.columns]\n",
+ " # predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1) \n",
+ " # predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1)\n",
+ " # for id in ids:\n",
+ " # row = predict_y[predict_y['id'] == id]\n",
+ " # try:\n",
+ " # sqlitedb.update_data('accuracy',f\"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}\",f\"id = {id}\")\n",
+ " # except:\n",
+ " # logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')\n",
+ " # 拼接市场最高最低价\n",
+ " # xlsfilename = os.path.join(dataset,'数据项下载.xls')\n",
+ " # df2 = pd.read_excel(xlsfilename)[5:]\n",
+ " # df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n",
+ " # print(df2.shape)\n",
+ " # df = pd.merge(predict_y,df2,on=['ds'],how='left')\n",
+ " # df['ds'] = pd.to_datetime(df['ds'])\n",
+ " # df = df.reindex()\n",
+ "\n",
+ " # 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0\n",
+ " # def is_within_range(row):\n",
+ " # for model in allmodelnames:\n",
+ " # if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']:\n",
+ " # return 1\n",
+ " # else:\n",
+ " # return 0\n",
+ "\n",
+ " # 比较真实最高最低,和预测最高最低 计算准确率\n",
+ " # def calculate_accuracy(row):\n",
+ " # # 全子集情况:\n",
+ " # if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \\\n",
+ " # (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):\n",
+ " # return 1 \n",
+ " # # 无交集情况:\n",
+ " # if row['max_price'] < row['LOW_PRICE'] or \\\n",
+ " # row['min_price'] > row['HIGH_PRICE']:\n",
+ " # return 0\n",
+ " # # 有交集情况:\n",
+ " # else:\n",
+ " # sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])\n",
+ " # middle_diff = sorted_prices[2] - sorted_prices[1]\n",
+ " # price_range = row['HIGH_PRICE'] - row['LOW_PRICE']\n",
+ " # accuracy = middle_diff / price_range\n",
+ " # return accuracy\n",
+ "\n",
+ " # columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']\n",
+ " # df[columns] = df[columns].astype(float)\n",
+ " # df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n",
+ " # df['ACCURACY'] = df.apply(is_within_range, axis=1)\n",
+ " # 取结束日期上一周的日期\n",
+ " def get_week_date(end_time):\n",
+ " endtime = end_time\n",
+ " endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n",
+ " up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n",
+ " up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]\n",
+ " create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]\n",
+ " ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]\n",
+ " return create_dates,ds_dates\n",
+ " \n",
+ " create_dates,ds_dates = get_week_date(end_time)\n",
+ " # 计算准确率并保存结果\n",
+ " def _get_accuracy_rate(df,create_dates,ds_dates):\n",
+ " df3 = df.copy()\n",
+ " df3 = df3[df3['CREAT_DATE'].isin(create_dates)]\n",
+ " df3 = df3[df3['ds'].isin(ds_dates)]\n",
+ " accuracy_rote = 0\n",
+ " for i,group in df3.groupby('CREAT_DATE'):\n",
+ " accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]\n",
+ " df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])\n",
+ " df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}\n",
+ " df4.to_sql(\"accuracy_rote\", con=sqlitedb.connection, if_exists='append', index=False)\n",
+ " # return df4\n",
+ " \n",
+ " # _get_accuracy_rate(df,create_dates,ds_dates)\n",
+ " \n",
+ " def _add_abs_error_rate():\n",
+ " # 计算每个预测值与真实值之间的偏差率\n",
+ " for model in allmodelnames:\n",
+ " df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']\n",
+ "\n",
+ " # 获取每行对应的最小偏差率值\n",
+ " min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)\n",
+ " # 获取每行对应的最小偏差率值对应的列名\n",
+ " min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) \n",
+ " # 将列名索引转换为列名\n",
+ " min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n",
+ " # 获取最小偏差率对应的模型的预测值\n",
+ " min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)\n",
+ " # 将最小偏差率对应的模型的预测值添加到DataFrame中\n",
+ " df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions\n",
+ " df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name\n",
+ " _add_abs_error_rate()\n",
+ "\n",
+ " # 判断 df 的数值列转为float\n",
+ " for col in df_combined3.columns:\n",
+ " try:\n",
+ " if col != 'ds':\n",
+ " df_combined3[col] = df_combined3[col].astype(float)\n",
+ " df_combined3[col] = df_combined3[col].round(2)\n",
+ " except ValueError:\n",
+ " pass\n",
+ " df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False) \n",
+ " \n",
+ " \n",
+ " # 历史价格+预测价格\n",
+ " sqlitedb.drop_table('testandpredict_groupby')\n",
+ " df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n",
+ " # 新增均值列\n",
+ " df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)\n",
+ "\n",
+ " # def _plt_predict_ture(df):\n",
+ " # lens = df.shape[0] if df.shape[0] < 180 else 90\n",
+ " # df = df[-lens:] # 取180个数据点画图\n",
+ " # # 历史价格\n",
+ " # plt.figure(figsize=(20, 10))\n",
+ " # plt.plot(df['ds'], df['y'], label='真实值')\n",
+ " # # 均值线\n",
+ " # plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')\n",
+ " # # 颜色填充\n",
+ " # plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)\n",
+ " # markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n",
+ " # random_marker = random.choice(markers)\n",
+ " # for model in modelnames:\n",
+ " # # for model in ['BiTCN','RNN']:\n",
+ " # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n",
+ " # # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n",
+ " # # 网格\n",
+ " # plt.grid(True)\n",
+ " # # 显示历史值\n",
+ " # for i, j in zip(df['ds'], df['y']):\n",
+ " # plt.text(i, j, str(j), ha='center', va='bottom')\n",
+ "\n",
+ " # # for model in most_model:\n",
+ " # # plt.plot(df['ds'], df[model], label=model,marker='o')\n",
+ " # # 当前日期画竖虚线\n",
+ " # plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n",
+ " # plt.legend()\n",
+ " # plt.xlabel('日期')\n",
+ " # plt.ylabel('价格')\n",
+ " \n",
+ " # plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n",
+ " # plt.close()\n",
+ " \n",
+ " def _plt_top10_predict_ture():\n",
+ " # 模型评估前十均值画图\n",
+ " df = sqlitedb.select_data(table_name = \"accuracy\")\n",
+ " # CREAT_DATE 去重取id最大的数据\n",
+ " df = df.sort_values(by=['CREAT_DATE','id'],ascending=[False,False]).drop_duplicates(subset=['CREAT_DATE'],keep='last')\n",
+ " print(df.shape)\n",
+ " lens = df.shape[0] if df.shape[0] < 180 else 180 \n",
+ " df = df[-lens:] # 取180个数据点画图\n",
+ " # 历史价格\n",
+ " plt.figure(figsize=(20, 10))\n",
+ " plt.plot(df['ds'], df['y'], label='真实值')\n",
+ " # 均值线\n",
+ " df['mean'] = df[allmodelnames[:10]].mean(axis=1)\n",
+ " plt.plot(df['ds'], df['mean'], color='g', linestyle='--', label='前十模型预测均值')\n",
+ " plt.plot(df['ds'], df['min_price'], color='r', linestyle='--', label='min_price')\n",
+ " plt.plot(df['ds'], df['max_price'], color='r', linestyle='--', label='max_price')\n",
+ " # 颜色填充\n",
+ " plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)\n",
+ " markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n",
+ " random_marker = random.choice(markers)\n",
+ " # for model in modelnames[:5]:\n",
+ " # for model in ['BiTCN','RNN']:\n",
+ " # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n",
+ " # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n",
+ " # 网格\n",
+ " plt.grid(True)\n",
+ " # 显示历史值\n",
+ " # for i, j in zip(df['ds'], df['y']):\n",
+ " # plt.text(i, j, str(j), ha='center', va='bottom')\n",
+ "\n",
+ " # for model in most_model:\n",
+ " # plt.plot(df['ds'], df[model], label=model,marker='o')\n",
+ " # 当前日期画竖虚线\n",
+ " plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n",
+ " plt.legend()\n",
+ " plt.xlabel('日期')\n",
+ " plt.ylabel('价格')\n",
+ " \n",
+ " plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')\n",
+ " plt.close()\n",
+ "\n",
+ " def _plt_predict_table(df): \n",
+ " # 预测值表格\n",
+ " fig, ax = plt.subplots(figsize=(20, 6))\n",
+ " ax.axis('off') # 关闭坐标轴\n",
+ " # 数值保留2位小数\n",
+ " df = df.round(2)\n",
+ " df = df[-horizon:]\n",
+ " df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]\n",
+ " # Day列放到最前面\n",
+ " df = df[['Day'] + list(df.columns[:-1])]\n",
+ " table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')\n",
+ " #加宽表格\n",
+ " table.auto_set_font_size(False)\n",
+ " table.set_fontsize(10)\n",
+ "\n",
+ " # 设置表格样式,列数据最小的用绿色标识\n",
+ " plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')\n",
+ " plt.close()\n",
+ " \n",
+ " def _plt_model_results3():\n",
+ " # 可视化评估结果\n",
+ " plt.rcParams['font.sans-serif'] = ['SimHei']\n",
+ " fig, ax = plt.subplots(figsize=(20, 10))\n",
+ " ax.axis('off') # 关闭坐标轴\n",
+ " table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')\n",
+ " # 加宽表格\n",
+ " table.auto_set_font_size(False)\n",
+ " table.set_fontsize(10)\n",
+ "\n",
+ " # 设置表格样式,列数据最小的用绿色标识\n",
+ " plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')\n",
+ " plt.close()\n",
+ "\n",
+ " # _plt_predict_ture(df_combined3)\n",
+ " _plt_top10_predict_ture()\n",
+ " _plt_predict_table(df_combined3)\n",
+ " _plt_model_results3()\n",
+ "\n",
+ " return model_results3\n",
+ " \n",
+ "model_losss(sqlitedb=sqlitedb,end_time='2024-12-16')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ce1967f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "31c0e11d-c87a-4e95-92a0-d1d09625e255",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from config_jingbo import *\n",
+ "import requests\n",
+ "import json\n",
+ "import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "83c81b9e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "''"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "login_pushreport_url\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a058f507",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "2b330ee3-c006-4ab1-8558-59c51ac8d86f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'data': {'account': 'api_test',\n",
+ " 'password': 'ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=',\n",
+ " 'tenantHashCode': '8a4577dbd919675758d57999a1e891fe',\n",
+ " 'terminal': 'API'},\n",
+ " 'funcModule': 'API',\n",
+ " 'funcOperation': '获取token'}"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "login_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "dcb6100a-ed2b-4077-a1a9-361c6cb565f9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def get_head_auth_report():\n",
+ " login_res = requests.post(url=login_pushreport_url, json=login_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " print(text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "22c0c7c4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzUxNDkzMzYsImp0aSI6IjczYzJkOGJjYzQ2NzQwYjNiYWQxZmI3NjMzODM4YTcxIn0.zLVuyCEbg-x9lRXuJDYbdiwzo_nhEQGCCInnJKfQcd8', 'md5Token': '39413fe9e3e93f717d8d2713c4487172'}, 'status': True}\n"
+ ]
+ }
+ ],
+ "source": [
+ "token = get_head_auth_report()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "12077ead",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzE5NTkzNjUsImp0aSI6IjRiMjcwNTgzN2YyZDQxOWM4MzQ3NjI2NDQwZDlhZGQzIn0.PPgnoiJt412dJiceqVW8w7qkJFY4s-VqU9z6ZIkpqho'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "a7ae21d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# def upload_warning_data(warning_data):\n",
+ "# token = get_head_auth_report()\n",
+ "# warning_data = warning_data\n",
+ "# headers = {\"Authorization\": token}\n",
+ "# logger.info(\"预警上传中...\")\n",
+ "# logger.info(f\"token:{token}\")\n",
+ "# logger.info(f\"warning_data:{warning_data}\" )\n",
+ "# upload_res = requests.post(url=upload_warning_url, headers=headers, json=warning_data, timeout=(3, 15))\n",
+ "# if upload_res:\n",
+ "# return upload_res\n",
+ "# else:\n",
+ "# logger.info(\"预警上传失败\")\n",
+ "# return None\n",
+ "\n",
+ "\n",
+ "# logger.info(f'上传预警信息')\n",
+ "# try:\n",
+ "# warning_date = datetime.datetime.now().strftime('%Y-%m-%d')\n",
+ "# content = f'{warning_date}有2887个停更'\n",
+ "# warning_data['data']['WARNING_DATE'] = warning_date\n",
+ "# warning_data['data']['WARNING_CONTENT'] = content\n",
+ "# upload_warning_data(warning_data)\n",
+ "# logger.info(f'上传预警信息成功')\n",
+ "# except Exception as e:\n",
+ "# logger.error(f'上传预警信息失败:{e}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "54942e1a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\"confirmFlg\":false,\"data\":[],\"status\":true}\n"
+ ]
+ }
+ ],
+ "source": [
+ "query_data_list_item_nos_data = {\n",
+ " \"funcModule\":'数据项编码集合',\n",
+ " \"funcOperation\":'数据项编码集合',\n",
+ " \"data\":{\n",
+ " \"dataItemNoList\":['EXCHANGE|RATE|MIDDLE_PRICE'],\n",
+ " \"dateEnd\":'20240101',\n",
+ " \"dateStart\":'20241024'\n",
+ " \n",
+ " }\n",
+ "}\n",
+ "\n",
+ "headers = {\"Authorization\": token}\n",
+ "items_res = requests.post(url=query_data_list_item_nos_url, headers=headers, json=query_data_list_item_nos_data, timeout=(3, 35))\n",
+ "print(items_res.text)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5