From bfb981d486d4e77e8ec35cb9d0c4f1d2274e8856 Mon Sep 17 00:00:00 2001 From: workpc Date: Wed, 25 Dec 2024 16:13:22 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A2=84=E6=B5=8B=E8=A1=A8=E6=B7=BB=E5=8A=A0y?= =?UTF-8?q?=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config_jingbo.py | 14 +- main_yuanyou.py | 16 +- models/nerulforcastmodels.py | 443 +---------------- 原油预测绘图调试.ipynb | 710 +++++++++++++++++++++++++++ 测试环境获取市场信息平台数据项.ipynb | 203 ++++++++ 5 files changed, 956 insertions(+), 430 deletions(-) create mode 100644 原油预测绘图调试.ipynb create mode 100644 测试环境获取市场信息平台数据项.ipynb diff --git a/config_jingbo.py b/config_jingbo.py index 1b1f2c7..bf6e9b6 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -173,7 +173,7 @@ login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login" upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei upload_warning_url = "http://192.168.100.53:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" - +query_data_list_item_nos_url = "http://192.168.100.53:8080/jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos" login_data = { "data": { @@ -213,6 +213,18 @@ warning_data = { } } +query_data_list_item_nos_data = { + "funcModule":'数据项管理', + "funcOperation":'查询数据项编码', + "data":{ + "dataItemNoList":['Brent活跃合约',''], + "dateEnd":'', + "dateStart":'2023-01-01' + + } +} + + # 北京环境数据库 host = '192.168.101.27' port = 3306 diff --git a/main_yuanyou.py b/main_yuanyou.py index 798d49c..5892b2c 100644 --- a/main_yuanyou.py +++ b/main_yuanyou.py @@ -107,6 +107,20 @@ def predict_main(): continue sqlitedb.insert_data('trueandpredict', tuple(row_dict.values()), columns=row_dict.keys()) + # 更新accuracy表的y值 + if not sqlitedb.check_table_exists('accuracy'): + pass + else: + update_y = sqlitedb.select_data('accuracy',where_condition="y is null") + if len(update_y) > 0: + logger.info('更新accuracy表的y值') + # 找到update_y 中ds且df中的y的行 + update_y = update_y[update_y['ds']<=end_time] + for row in update_y.itertuples(index=False): + row_dict = row._asdict() + yy = df[df['ds']==row_dict['ds']]['y'].values[0] + sqlitedb.update_data('accuracy', f"y = {yy}", where_condition=f"ds = '{row_dict['ds']}'") + import datetime # 判断当前日期是不是周一 is_weekday = datetime.datetime.now().weekday() == 0 @@ -243,7 +257,7 @@ if __name__ == '__main__': global end_time is_on = True # 遍历2024-11-25 到 2024-12-3 之间的工作日日期 - for i_time in pd.date_range('2024-10-07', '2024-12-16', freq='B'): + for i_time in pd.date_range('2024-10-29', '2024-12-16', freq='B'): end_time = i_time.strftime('%Y-%m-%d') predict_main() if is_on: diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index 6ffb249..d4160b9 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -401,8 +401,6 @@ def model_losss(sqlitedb,end_time): else: return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price']) - - def find_most_common_model(): # 最多频率的模型名称 min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax() @@ -445,17 +443,7 @@ def model_losss(sqlitedb,end_time): df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False) except: df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False) - - # 更新accuracy表中的y值 - update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null') - if len(update_y) > 0: - df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())] - if len(df_combined4) > 0: - for index, row in df_combined4.iterrows(): - try: - sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'") - except: - logger.error(f'更新accuracy表中的y值失败,row={row}') + # 上周准确率计算 predict_y = sqlitedb.select_data(table_name = "accuracy") # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist() @@ -479,6 +467,8 @@ def model_losss(sqlitedb,end_time): sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}") except: logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}') + + # 拼接市场最高最低价 xlsfilename = os.path.join(dataset,'数据项下载.xls') df2 = pd.read_excel(xlsfilename)[5:] @@ -496,6 +486,7 @@ def model_losss(sqlitedb,end_time): else: return 0 + # 定义一个函数来计算准确率 # 比较真实最高最低,和预测最高最低 计算准确率 def calculate_accuracy(row): # 全子集情况: @@ -527,430 +518,26 @@ def model_losss(sqlitedb,end_time): create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]] ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]] return create_dates,ds_dates - - create_dates,ds_dates = get_week_date(end_time) + # 计算准确率并保存结果 - def _get_accuracy_rate(df,create_dates,ds_dates): + def _get_accuracy_rate(df,create_dates,ds_dates,endtime): df3 = df.copy() df3 = df3[df3['CREAT_DATE'].isin(create_dates)] df3 = df3[df3['ds'].isin(ds_dates)] accuracy_rote = 0 for i,group in df3.groupby('CREAT_DATE'): - accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1] - df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率']) - df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote} - df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False) - # return df4 - - _get_accuracy_rate(df,create_dates,ds_dates) - - def _add_abs_error_rate(): - # 计算每个预测值与真实值之间的偏差率 - for model in allmodelnames: - df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y'] - - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) - # 获取最小偏差率对应的模型的预测值 - min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1) - # 将最小偏差率对应的模型的预测值添加到DataFrame中 - df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions - df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name - # _add_abs_error_rate() - - # 判断 df 的数值列转为float - for col in df_combined3.columns: - try: - if col != 'ds': - df_combined3[col] = df_combined3[col].astype(float) - df_combined3[col] = df_combined3[col].round(2) - except ValueError: - pass - df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False) - - - # 历史价格+预测价格 - sqlitedb.drop_table('testandpredict_groupby') - df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False) - # 新增均值列 - df_combined3['mean'] = df_combined3[modelnames].mean(axis=1) - - def _plt_predict_ture(df): - lens = df.shape[0] if df.shape[0] < 180 else 90 - df = df[-lens:] # 取180个数据点画图 - # 历史价格 - plt.figure(figsize=(20, 10)) - plt.plot(df['ds'], df['y'], label='真实值') - # 均值线 - plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值') - # 颜色填充 - plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2) - markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd'] - random_marker = random.choice(markers) - for model in modelnames: - # for model in ['BiTCN','RNN']: - plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker) - # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange') - # 网格 - plt.grid(True) - # 显示历史值 - for i, j in zip(df['ds'], df['y']): - plt.text(i, j, str(j), ha='center', va='bottom') - - # for model in most_model: - # plt.plot(df['ds'], df[model], label=model,marker='o') - # 当前日期画竖虚线 - plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--') - plt.legend() - plt.xlabel('日期') - plt.ylabel('价格') - - plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight') - plt.close() - - def _plt_predict_table(df): - # 预测值表格 - fig, ax = plt.subplots(figsize=(20, 6)) - ax.axis('off') # 关闭坐标轴 - # 数值保留2位小数 - df = df.round(2) - df = df[-horizon:] - df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)] - # Day列放到最前面 - df = df[['Day'] + list(df.columns[:-1])] - table = ax.table(cellText=df.values, colLabels=df.columns, loc='center') - #加宽表格 - table.auto_set_font_size(False) - table.set_fontsize(10) - - # 设置表格样式,列数据最小的用绿色标识 - plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight') - plt.close() - - def _plt_model_results3(): - # 可视化评估结果 - plt.rcParams['font.sans-serif'] = ['SimHei'] - fig, ax = plt.subplots(figsize=(20, 10)) - ax.axis('off') # 关闭坐标轴 - table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center') - # 加宽表格 - table.auto_set_font_size(False) - table.set_fontsize(10) - - # 设置表格样式,列数据最小的用绿色标识 - plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight') - plt.close() - - _plt_predict_ture(df_combined3) - _plt_predict_table(df_combined3) - _plt_model_results3() - - return model_results3 - -# 原油计算预测评估指数 -@exception_logger -def model_losss_bak(sqlitedb,end_time): - global dataset - global rote - most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]] - most_model_name = most_model[0] - - # 预测数据处理 predict - df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv")) - df_combined = dateConvert(df_combined) - # 删除空列 - df_combined.dropna(axis=1,inplace=True) - # 删除缺失值,预测过程不能有缺失值 - df_combined.dropna(inplace=True) - # 其他列转为数值类型 - df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] }) - # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 - df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max') - - # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 - df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']] - # 删除模型生成的cutoff列 - df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True) - # 获取模型名称 - modelnames = df_combined.columns.to_list()[1:] - if 'y' in modelnames: - modelnames.remove('y') - df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要 - - - # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE - cellText = [] - - # 遍历模型名称,计算模型评估指标 - for model in modelnames: - modelmse = mse(df_combined['y'], df_combined[model]) - modelrmse = rmse(df_combined['y'], df_combined[model]) - modelmae = mae(df_combined['y'], df_combined[model]) - # modelmape = mape(df_combined['y'], df_combined[model]) - # modelsmape = smape(df_combined['y'], df_combined[model]) - # modelr2 = r2_score(df_combined['y'], df_combined[model]) - cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)]) - - model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)']) - # 按MSE降序排列 - model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True) - model_results3.to_csv(os.path.join(dataset,"model_evaluation.csv"),index=False) - modelnames = model_results3['模型(Model)'].tolist() - allmodelnames = modelnames.copy() - # 保存5个最佳模型的名称 - if len(modelnames) > 5: - modelnames = modelnames[0:5] - if is_fivemodels: - pass - else: - with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f: - f.write(','.join(modelnames) + '\n') - - # 预测值与真实值对比图 - plt.rcParams['font.sans-serif'] = ['SimHei'] - plt.figure(figsize=(15, 10)) - for n,model in enumerate(modelnames[:5]): - plt.subplot(3, 2, n+1) - plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') - plt.plot(df_combined3['ds'], df_combined3[model], label=model) - plt.legend() - plt.xlabel('日期') - plt.ylabel('价格') - plt.title(model+'拟合') - plt.subplots_adjust(hspace=0.5) - plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight') - plt.close() - - - # # 历史数据+预测数据 - # # 拼接未来时间预测 - df_predict = pd.read_csv(os.path.join(dataset,'predict.csv')) - df_predict.drop('unique_id',inplace=True,axis=1) - df_predict.dropna(axis=1,inplace=True) - - try: - df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d') - except ValueError : - df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d') - - # def first_row_to_database(df): - # # # 取第一行数据存储到数据库中 - # first_row = df.head(1) - # first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00') - # # 将预测结果保存到数据库 - # if not sqlitedb.check_table_exists('trueandpredict'): - # first_row.to_sql('trueandpredict',sqlitedb.connection,index=False) - # else: - # for col in first_row.columns: - # sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') - # for row in first_row.itertuples(index=False): - # row_dict = row._asdict() - # columns=row_dict.keys() - # check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'") - # if len(check_query) > 0: - # set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) - # sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'") - # continue - # sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns) - - # first_row_to_database(df_predict) - - - - df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True) - - # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值 - names = [] - names_df = df_combined3.copy() - for col in allmodelnames: - names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name] - names.append(f'{col}-{most_model_name}-误差比例') - - names_df = names_df[names] - def add_rote_column(row): - columns = [] - for r in names_df.columns: - if row[r] <= rote: - columns.append(r.split('-')[0]) - return pd.Series([columns], index=['columns']) - names_df['columns'] = names_df.apply(add_rote_column, axis=1) - - def add_upper_lower_bound(row): - - # 计算上边界值 - upper_bound = row.max() - # 计算下边界值 - lower_bound = row.min() - return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile']) - - # df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1) - - # 取前五最佳模型的最大最小值作为上下边界值 - df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1) - - def find_closest_values(row): - x = row.y - if x is None or np.isnan(x): - return pd.Series([None, None], index=['min_price','max_price']) - # row = row.drop('ds') - row = row.values.tolist() - row.sort() - print(row) - # x 在row中的索引 - index = row.index(x) - if index == 0: - return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price']) - elif index == len(row)-1: - return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price']) - else: - return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price']) - - - - def find_most_common_model(): - # 最多频率的模型名称 - min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax() - max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax() - if min_model_max_frequency_model == max_model_max_frequency_model: - # 取60天第二多的模型 - max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1] - - df_predict['min_model'] = min_model_max_frequency_model - df_predict['max_model'] = max_model_max_frequency_model - df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] - df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model] - - - # find_most_common_model() - - df_combined3['ds'] = pd.to_datetime(df_combined3['ds']) - df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d') - df_predict2 = df_combined3.tail(horizon) - - # 保存到数据库 - if not sqlitedb.check_table_exists('accuracy'): - columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price']) - sqlitedb.create_table('accuracy',columns=columns) - existing_data = sqlitedb.select_data(table_name = "accuracy") - - if not existing_data.empty: - max_id = existing_data['id'].astype(int).max() - df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2)) - else: - df_predict2['id'] = range(1, 1 + len(df_predict2)) - # df_predict2['CREAT_DATE'] = now if end_time == '' else end_time - df_predict2['CREAT_DATE'] = end_time - def get_common_columns(df1, df2): - # 获取两个DataFrame的公共列名 - return list(set(df1.columns).intersection(df2.columns)) - - common_columns = get_common_columns(df_predict2, existing_data) - try: - df_predict2[common_columns].to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False) - except: - df_predict2.to_sql("accuracy", con=sqlitedb.connection, if_exists='append', index=False) - - # 更新accuracy表中的y值 - update_y = sqlitedb.select_data(table_name = "accuracy",where_condition='y is null') - if len(update_y) > 0: - df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())] - if len(df_combined4) > 0: - for index, row in df_combined4.iterrows(): - try: - sqlitedb.update_data('accuracy',f"y = {row['y']}",f"ds = '{row['ds']}'") - except: - logger.error(f'更新accuracy表中的y值失败,row={row}') - # 上周准确率计算 - predict_y = sqlitedb.select_data(table_name = "accuracy") - # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist() - ids = predict_y['id'].tolist() - # 准确率基准与绘图上下界逻辑一致 - # predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']] - # 模型评估前五均值 - predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1 - predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1 - # 模型评估前十均值 - # predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1 - # predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1 - # 模型评估前十最大最小 - # allmodelnames 和 predict_y 列 重复的 - # allmodelnames = [col for col in allmodelnames if col in predict_y.columns] - # predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1) - # predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1) - for id in ids: - row = predict_y[predict_y['id'] == id] - try: - sqlitedb.update_data('accuracy',f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}",f"id = {id}") - except: - logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}') - # 拼接市场最高最低价 - xlsfilename = os.path.join(dataset,'数据项下载.xls') - df2 = pd.read_excel(xlsfilename)[5:] - df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'}) - print(df2.shape) - df = pd.merge(predict_y,df2,on=['ds'],how='left') - df['ds'] = pd.to_datetime(df['ds']) - df = df.reindex() - - # 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0 - def is_within_range(row): - for model in allmodelnames: - if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']: - return 1 - else: - return 0 - - # 比较真实最高最低,和预测最高最低 计算准确率 - def calculate_accuracy(row): - # 全子集情况: - if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \ - (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']): - return 1 - # 无交集情况: - if row['max_price'] < row['LOW_PRICE'] or \ - row['min_price'] > row['HIGH_PRICE']: - return 0 - # 有交集情况: - else: - sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']]) - middle_diff = sorted_prices[2] - sorted_prices[1] - price_range = row['HIGH_PRICE'] - row['LOW_PRICE'] - accuracy = middle_diff / price_range - return accuracy - - columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price'] - df[columns] = df[columns].astype(float) - df['ACCURACY'] = df.apply(calculate_accuracy, axis=1) - # df['ACCURACY'] = df.apply(is_within_range, axis=1) - # 取结束日期上一周的日期 - def get_week_date(end_time): - endtime = end_time - endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d') - up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14) - up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)][4:-2] - up_week_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates] - return up_week_dates - up_week_dates = get_week_date(end_time) - - # 计算准确率并保存结果 - def _get_accuracy_rate(df,up_week_dates,endtime): - df3 = df.copy() - df3 = df3[df3['CREAT_DATE'].isin(up_week_dates)] - df3 = df3[df3['ds'].isin(up_week_dates)] - accuracy_rote = 0 - for i,group in df3.groupby('ds'): - print('权重:',weight_dict[len(group)-1]) - print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]) + # print('日期:',i) + # print(group) + # print('权重:',weight_dict[len(group)-1]) + # print('准确率:',(group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]) accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1] df3.to_csv(os.path.join(dataset,f'accuracy_{endtime}.csv'),index=False) df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率']) - df4.loc[len(df4)] = {'开始日期':up_week_dates[0],'结束日期':up_week_dates[-1],'准确率':accuracy_rote} + df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote} + df4.to_csv(os.path.join(dataset,f'accuracy_rote_{endtime}.csv'),index=False) df4.to_sql("accuracy_rote", con=sqlitedb.connection, if_exists='append', index=False) - _get_accuracy_rate(df,up_week_dates,end_time) + create_dates,ds_dates = get_week_date(end_time) + _get_accuracy_rate(df,create_dates,ds_dates,end_time) def _add_abs_error_rate(): # 计算每个预测值与真实值之间的偏差率 @@ -1215,7 +802,7 @@ def model_losss_juxiting(sqlitedb): df_predict2 = df_predict.copy() df_predict2['ds'] = pd.to_datetime(df_predict2['ds']) - df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') + df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d') def _add_abs_error_rate(): diff --git a/原油预测绘图调试.ipynb b/原油预测绘图调试.ipynb new file mode 100644 index 0000000..bcbdda1 --- /dev/null +++ b/原油预测绘图调试.ipynb @@ -0,0 +1,710 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7fadc60c-d710-4b8c-89cd-1d889ece1eaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "数据库连接成功 192.168.101.27 jingbo_test root\n" + ] + } + ], + "source": [ + "# 读取配置\n", + "# 父目录下的lib\n", + "from lib.dataread import *\n", + "from lib.tools import Graphs,mse,rmse,mae,exception_logger\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0e5b6f30-b7ca-4718-97a3-48b54156e07f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(51, 30)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
模型(Model)平均平方误差(MSE)均方根误差(RMSE)平均绝对误差(MAE)
11DilatedRNN1.5670001.2520.978
14NLinear1.9050001.3801.104
10BiTCN1.9060001.3801.042
6PatchTST1.9390001.3931.129
19TiDE1.9670001.4021.090
4TSMixer2.0560001.4341.111
7RNN2.1010001.4491.144
13DLinear2.1620001.4701.178
15TFT2.1960001.4821.137
16FEDformer2.2110001.4871.239
9TCN2.3970001.5481.276
0NHITS2.4540001.5671.190
12MLP2.4680001.5711.224
5TSMixerx2.4900001.5781.231
1Informer3.0950001.7591.352
20DeepNPTS3.2670001.8081.357
8GRU5.1720002.2741.909
2LSTM6.8440002.6162.386
18MLPMultivariate8.1630002.8572.221
17StemGNN17.2160004.1493.359
3iTransformer21.5680014.6443.487
\n", + "
" + ], + "text/plain": [ + " 模型(Model) 平均平方误差(MSE) 均方根误差(RMSE) 平均绝对误差(MAE)\n", + "11 DilatedRNN 1.567000 1.252 0.978\n", + "14 NLinear 1.905000 1.380 1.104\n", + "10 BiTCN 1.906000 1.380 1.042\n", + "6 PatchTST 1.939000 1.393 1.129\n", + "19 TiDE 1.967000 1.402 1.090\n", + "4 TSMixer 2.056000 1.434 1.111\n", + "7 RNN 2.101000 1.449 1.144\n", + "13 DLinear 2.162000 1.470 1.178\n", + "15 TFT 2.196000 1.482 1.137\n", + "16 FEDformer 2.211000 1.487 1.239\n", + "9 TCN 2.397000 1.548 1.276\n", + "0 NHITS 2.454000 1.567 1.190\n", + "12 MLP 2.468000 1.571 1.224\n", + "5 TSMixerx 2.490000 1.578 1.231\n", + "1 Informer 3.095000 1.759 1.352\n", + "20 DeepNPTS 3.267000 1.808 1.357\n", + "8 GRU 5.172000 2.274 1.909\n", + "2 LSTM 6.844000 2.616 2.386\n", + "18 MLPMultivariate 8.163000 2.857 2.221\n", + "17 StemGNN 17.216000 4.149 3.359\n", + "3 iTransformer 21.568001 4.644 3.487" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# 原油计算预测评估指数\n", + "@exception_logger\n", + "def model_losss(sqlitedb,end_time):\n", + " global dataset\n", + " global rote\n", + " most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]]\n", + " most_model_name = most_model[0]\n", + "\n", + " # 预测数据处理 predict\n", + " df_combined = loadcsv(os.path.join(dataset,\"cross_validation.csv\")) \n", + " df_combined = dateConvert(df_combined)\n", + " # 删除空列\n", + " df_combined.dropna(axis=1,inplace=True)\n", + " # 删除缺失值,预测过程不能有缺失值\n", + " df_combined.dropna(inplace=True) \n", + " # 其他列转为数值类型\n", + " df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] })\n", + " # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值\n", + " df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max')\n", + "\n", + " # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列\n", + " df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']]\n", + " # 删除模型生成的cutoff列\n", + " df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True)\n", + " # 获取模型名称\n", + " modelnames = df_combined.columns.to_list()[1:] \n", + " if 'y' in modelnames:\n", + " modelnames.remove('y')\n", + " # df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要\n", + " df_combined3 = sqlitedb.select_data('accuracy')\n", + "\n", + "\n", + " # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE\n", + " cellText = []\n", + "\n", + " # 遍历模型名称,计算模型评估指标 \n", + " for model in modelnames:\n", + " modelmse = mse(df_combined['y'], df_combined[model])\n", + " modelrmse = rmse(df_combined['y'], df_combined[model])\n", + " modelmae = mae(df_combined['y'], df_combined[model])\n", + " # modelmape = mape(df_combined['y'], df_combined[model])\n", + " # modelsmape = smape(df_combined['y'], df_combined[model])\n", + " # modelr2 = r2_score(df_combined['y'], df_combined[model])\n", + " cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)])\n", + " \n", + " model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)'])\n", + " # 按MSE降序排列\n", + " model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True)\n", + " model_results3.to_csv(os.path.join(dataset,\"model_evaluation.csv\"),index=False)\n", + " modelnames = model_results3['模型(Model)'].tolist()\n", + " allmodelnames = modelnames.copy()\n", + " # # 保存5个最佳模型的名称\n", + " # if len(modelnames) > 5:\n", + " # modelnames = modelnames[0:5]\n", + " # if is_fivemodels:\n", + " # pass\n", + " # else:\n", + " # with open(os.path.join(dataset,\"best_modelnames.txt\"), 'w') as f:\n", + " # f.write(','.join(modelnames) + '\\n')\n", + "\n", + " # # 预测值与真实值对比图\n", + " # plt.rcParams['font.sans-serif'] = ['SimHei']\n", + " # plt.figure(figsize=(15, 10))\n", + " # for n,model in enumerate(modelnames[:5]):\n", + " # plt.subplot(3, 2, n+1)\n", + " # plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值')\n", + " # plt.plot(df_combined3['ds'], df_combined3[model], label=model)\n", + " # plt.legend()\n", + " # plt.xlabel('日期')\n", + " # plt.ylabel('价格')\n", + " # plt.title(model+'拟合')\n", + " # plt.subplots_adjust(hspace=0.5)\n", + " # plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight')\n", + " # plt.close()\n", + " \n", + " \n", + " # # 历史数据+预测数据\n", + " # # 拼接未来时间预测\n", + " df_predict = pd.read_csv(os.path.join(dataset,'predict.csv'))\n", + " df_predict.drop('unique_id',inplace=True,axis=1)\n", + " df_predict.dropna(axis=1,inplace=True)\n", + "\n", + " try:\n", + " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d')\n", + " except ValueError :\n", + " df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d')\n", + "\n", + " \n", + " df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True)\n", + "\n", + " # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值\n", + " # names = []\n", + " # names_df = df_combined3.copy()\n", + " # for col in allmodelnames:\n", + " # names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name]\n", + " # names.append(f'{col}-{most_model_name}-误差比例')\n", + "\n", + " # names_df = names_df[names]\n", + " # def add_rote_column(row):\n", + " # columns = []\n", + " # for r in names_df.columns:\n", + " # if row[r] <= rote:\n", + " # columns.append(r.split('-')[0])\n", + " # return pd.Series([columns], index=['columns'])\n", + " # names_df['columns'] = names_df.apply(add_rote_column, axis=1)\n", + " \n", + " def add_upper_lower_bound(row):\n", + "\n", + " # 计算上边界值\n", + " upper_bound = row.max()\n", + " # 计算下边界值\n", + " lower_bound = row.min()\n", + " return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])\n", + "\n", + " # df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)\n", + "\n", + " # 取前五最佳模型的最大最小值作为上下边界值\n", + " # df_combined3[['min_within_quantile','max_within_quantile']]= df_combined3[modelnames].apply(add_upper_lower_bound, axis=1)\n", + " \n", + " def find_closest_values(row):\n", + " x = row.y\n", + " if x is None or np.isnan(x):\n", + " return pd.Series([None, None], index=['min_price','max_price'])\n", + " # row = row.drop('ds')\n", + " row = row.values.tolist()\n", + " row.sort()\n", + " print(row)\n", + " # x 在row中的索引\n", + " index = row.index(x)\n", + " if index == 0:\n", + " return pd.Series([row[index+1], row[index+2]], index=['min_price','max_price'])\n", + " elif index == len(row)-1:\n", + " return pd.Series([row[index-2], row[index-1]], index=['min_price','max_price'])\n", + " else:\n", + " return pd.Series([row[index-1], row[index+1]], index=['min_price','max_price'])\n", + "\n", + "\n", + " \n", + " def find_most_common_model():\n", + " # 最多频率的模型名称\n", + " min_model_max_frequency_model = df_combined3['min_model'].tail(60).value_counts().idxmax()\n", + " max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().idxmax()\n", + " if min_model_max_frequency_model == max_model_max_frequency_model:\n", + " # 取60天第二多的模型\n", + " max_model_max_frequency_model = df_combined3['max_model'].tail(60).value_counts().nlargest(2).index[1]\n", + "\n", + " df_predict['min_model'] = min_model_max_frequency_model\n", + " df_predict['max_model'] = max_model_max_frequency_model\n", + " df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]\n", + " df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]\n", + "\n", + "\n", + " # find_most_common_model()\n", + "\n", + " df_combined3['ds'] = pd.to_datetime(df_combined3['ds'])\n", + " df_combined3['ds'] = df_combined3['ds'].dt.strftime('%Y-%m-%d')\n", + " df_predict2 = df_combined3.tail(horizon)\n", + "\n", + " # 保存到数据库\n", + " # if not sqlitedb.check_table_exists('accuracy'):\n", + " # columns = ','.join(df_combined3.columns.to_list()+['id','CREAT_DATE','min_price','max_price'])\n", + " # sqlitedb.create_table('accuracy',columns=columns)\n", + " # existing_data = sqlitedb.select_data(table_name = \"accuracy\")\n", + "\n", + " # if not existing_data.empty:\n", + " # max_id = existing_data['id'].astype(int).max()\n", + " # df_predict2['id'] = range(max_id + 1, max_id + 1 + len(df_predict2))\n", + " # else:\n", + " # df_predict2['id'] = range(1, 1 + len(df_predict2))\n", + " # df_predict2['CREAT_DATE'] = now if end_time == '' else end_time\n", + " # df_predict2['CREAT_DATE'] = end_time\n", + " # def get_common_columns(df1, df2):\n", + " # # 获取两个DataFrame的公共列名\n", + " # return list(set(df1.columns).intersection(df2.columns))\n", + "\n", + " # common_columns = get_common_columns(df_predict2, existing_data)\n", + " # try:\n", + " # df_predict2[common_columns].to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n", + " # except:\n", + " # df_predict2.to_sql(\"accuracy\", con=sqlitedb.connection, if_exists='append', index=False)\n", + " \n", + " # 更新accuracy表中的y值\n", + " # update_y = sqlitedb.select_data(table_name = \"accuracy\",where_condition='y is null')\n", + " # if len(update_y) > 0:\n", + " # df_combined4 = df_combined3[(df_combined3['ds'].isin(update_y['ds'])) & (df_combined3['y'].notnull())]\n", + " # if len(df_combined4) > 0: \n", + " # for index, row in df_combined4.iterrows():\n", + " # try:\n", + " # sqlitedb.update_data('accuracy',f\"y = {row['y']}\",f\"ds = '{row['ds']}'\")\n", + " # except:\n", + " # logger.error(f'更新accuracy表中的y值失败,row={row}')\n", + " # 上周准确率计算\n", + " # predict_y = sqlitedb.select_data(table_name = \"accuracy\") \n", + " # ids = predict_y[predict_y['min_price'].isnull()]['id'].tolist()\n", + " # ids = predict_y['id'].tolist()\n", + " # 准确率基准与绘图上下界逻辑一致\n", + " # predict_y[['min_price','max_price']] = predict_y[['min_within_quantile','max_within_quantile']]\n", + " # 模型评估前五均值 \n", + " # predict_y['min_price'] = predict_y[modelnames].mean(axis=1) -1\n", + " # predict_y['max_price'] = predict_y[modelnames].mean(axis=1) +1\n", + " # # 模型评估前十均值 \n", + " # predict_y['min_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) -1.5\n", + " # predict_y['max_price'] = predict_y[allmodelnames[0:10]].mean(axis=1) +1.5\n", + " # 模型评估前十最大最小\n", + " # allmodelnames 和 predict_y 列 重复的\n", + " # allmodelnames = [col for col in allmodelnames if col in predict_y.columns]\n", + " # predict_y['min_price'] = predict_y[allmodelnames[0:10]].min(axis=1) \n", + " # predict_y['max_price'] = predict_y[allmodelnames[0:10]].max(axis=1)\n", + " # for id in ids:\n", + " # row = predict_y[predict_y['id'] == id]\n", + " # try:\n", + " # sqlitedb.update_data('accuracy',f\"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]}\",f\"id = {id}\")\n", + " # except:\n", + " # logger.error(f'更新accuracy表中的min_price,max_price值失败,row={row}')\n", + " # 拼接市场最高最低价\n", + " # xlsfilename = os.path.join(dataset,'数据项下载.xls')\n", + " # df2 = pd.read_excel(xlsfilename)[5:]\n", + " # df2 = df2.rename(columns = {'数据项名称':'ds','布伦特最低价':'LOW_PRICE','布伦特最高价':'HIGH_PRICE'})\n", + " # print(df2.shape)\n", + " # df = pd.merge(predict_y,df2,on=['ds'],how='left')\n", + " # df['ds'] = pd.to_datetime(df['ds'])\n", + " # df = df.reindex()\n", + "\n", + " # 判断预测值在不在布伦特最高最低价范围内,准确率为1,否则为0\n", + " # def is_within_range(row):\n", + " # for model in allmodelnames:\n", + " # if row['LOW_PRICE'] <= row[col] <= row['HIGH_PRICE']:\n", + " # return 1\n", + " # else:\n", + " # return 0\n", + "\n", + " # 比较真实最高最低,和预测最高最低 计算准确率\n", + " # def calculate_accuracy(row):\n", + " # # 全子集情况:\n", + " # if (row['max_price'] >= row['HIGH_PRICE'] and row['min_price'] <= row['LOW_PRICE']) or \\\n", + " # (row['max_price'] <= row['HIGH_PRICE'] and row['min_price'] >= row['LOW_PRICE']):\n", + " # return 1 \n", + " # # 无交集情况:\n", + " # if row['max_price'] < row['LOW_PRICE'] or \\\n", + " # row['min_price'] > row['HIGH_PRICE']:\n", + " # return 0\n", + " # # 有交集情况:\n", + " # else:\n", + " # sorted_prices = sorted([row['LOW_PRICE'], row['min_price'], row['max_price'], row['HIGH_PRICE']])\n", + " # middle_diff = sorted_prices[2] - sorted_prices[1]\n", + " # price_range = row['HIGH_PRICE'] - row['LOW_PRICE']\n", + " # accuracy = middle_diff / price_range\n", + " # return accuracy\n", + "\n", + " # columns = ['HIGH_PRICE','LOW_PRICE','min_price','max_price']\n", + " # df[columns] = df[columns].astype(float)\n", + " # df['ACCURACY'] = df.apply(calculate_accuracy, axis=1)\n", + " # df['ACCURACY'] = df.apply(is_within_range, axis=1)\n", + " # 取结束日期上一周的日期\n", + " def get_week_date(end_time):\n", + " endtime = end_time\n", + " endtimeweek = datetime.datetime.strptime(endtime, '%Y-%m-%d')\n", + " up_week = endtimeweek - datetime.timedelta(days=endtimeweek.weekday() + 14)\n", + " up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]\n", + " create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]\n", + " ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]\n", + " return create_dates,ds_dates\n", + " \n", + " create_dates,ds_dates = get_week_date(end_time)\n", + " # 计算准确率并保存结果\n", + " def _get_accuracy_rate(df,create_dates,ds_dates):\n", + " df3 = df.copy()\n", + " df3 = df3[df3['CREAT_DATE'].isin(create_dates)]\n", + " df3 = df3[df3['ds'].isin(ds_dates)]\n", + " accuracy_rote = 0\n", + " for i,group in df3.groupby('CREAT_DATE'):\n", + " accuracy_rote += (group['ACCURACY'].sum()/len(group))*weight_dict[len(group)-1]\n", + " df4 = pd.DataFrame(columns=['开始日期','结束日期','准确率'])\n", + " df4.loc[len(df4)] = {'开始日期':ds_dates[0],'结束日期':ds_dates[-1],'准确率':accuracy_rote}\n", + " df4.to_sql(\"accuracy_rote\", con=sqlitedb.connection, if_exists='append', index=False)\n", + " # return df4\n", + " \n", + " # _get_accuracy_rate(df,create_dates,ds_dates)\n", + " \n", + " def _add_abs_error_rate():\n", + " # 计算每个预测值与真实值之间的偏差率\n", + " for model in allmodelnames:\n", + " df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']\n", + "\n", + " # 获取每行对应的最小偏差率值\n", + " min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1)\n", + " # 获取每行对应的最小偏差率值对应的列名\n", + " min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) \n", + " # 将列名索引转换为列名\n", + " min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0])\n", + " # 获取最小偏差率对应的模型的预测值\n", + " min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1)\n", + " # 将最小偏差率对应的模型的预测值添加到DataFrame中\n", + " df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions\n", + " df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name\n", + " _add_abs_error_rate()\n", + "\n", + " # 判断 df 的数值列转为float\n", + " for col in df_combined3.columns:\n", + " try:\n", + " if col != 'ds':\n", + " df_combined3[col] = df_combined3[col].astype(float)\n", + " df_combined3[col] = df_combined3[col].round(2)\n", + " except ValueError:\n", + " pass\n", + " df_combined3.to_csv(os.path.join(dataset,\"testandpredict_groupby.csv\"),index=False) \n", + " \n", + " \n", + " # 历史价格+预测价格\n", + " sqlitedb.drop_table('testandpredict_groupby')\n", + " df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False)\n", + " # 新增均值列\n", + " df_combined3['mean'] = df_combined3[modelnames].mean(axis=1)\n", + "\n", + " # def _plt_predict_ture(df):\n", + " # lens = df.shape[0] if df.shape[0] < 180 else 90\n", + " # df = df[-lens:] # 取180个数据点画图\n", + " # # 历史价格\n", + " # plt.figure(figsize=(20, 10))\n", + " # plt.plot(df['ds'], df['y'], label='真实值')\n", + " # # 均值线\n", + " # plt.plot(df['ds'], df['mean'], color='r', linestyle='--', label='前五模型预测均值')\n", + " # # 颜色填充\n", + " # plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)\n", + " # markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n", + " # random_marker = random.choice(markers)\n", + " # for model in modelnames:\n", + " # # for model in ['BiTCN','RNN']:\n", + " # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n", + " # # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n", + " # # 网格\n", + " # plt.grid(True)\n", + " # # 显示历史值\n", + " # for i, j in zip(df['ds'], df['y']):\n", + " # plt.text(i, j, str(j), ha='center', va='bottom')\n", + "\n", + " # # for model in most_model:\n", + " # # plt.plot(df['ds'], df[model], label=model,marker='o')\n", + " # # 当前日期画竖虚线\n", + " # plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n", + " # plt.legend()\n", + " # plt.xlabel('日期')\n", + " # plt.ylabel('价格')\n", + " \n", + " # plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight')\n", + " # plt.close()\n", + " \n", + " def _plt_top10_predict_ture():\n", + " # 模型评估前十均值画图\n", + " df = sqlitedb.select_data(table_name = \"accuracy\")\n", + " # CREAT_DATE 去重取id最大的数据\n", + " df = df.sort_values(by=['CREAT_DATE','id'],ascending=[False,False]).drop_duplicates(subset=['CREAT_DATE'],keep='last')\n", + " print(df.shape)\n", + " lens = df.shape[0] if df.shape[0] < 180 else 180 \n", + " df = df[-lens:] # 取180个数据点画图\n", + " # 历史价格\n", + " plt.figure(figsize=(20, 10))\n", + " plt.plot(df['ds'], df['y'], label='真实值')\n", + " # 均值线\n", + " df['mean'] = df[allmodelnames[:10]].mean(axis=1)\n", + " plt.plot(df['ds'], df['mean'], color='g', linestyle='--', label='前十模型预测均值')\n", + " plt.plot(df['ds'], df['min_price'], color='r', linestyle='--', label='min_price')\n", + " plt.plot(df['ds'], df['max_price'], color='r', linestyle='--', label='max_price')\n", + " # 颜色填充\n", + " plt.fill_between(df['ds'], df['max_price'], df['min_price'], alpha=0.2)\n", + " markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']\n", + " random_marker = random.choice(markers)\n", + " # for model in modelnames[:5]:\n", + " # for model in ['BiTCN','RNN']:\n", + " # plt.plot(df['ds'][-horizon:], df[model][-horizon:], label=model,marker=random_marker)\n", + " # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')\n", + " # 网格\n", + " plt.grid(True)\n", + " # 显示历史值\n", + " # for i, j in zip(df['ds'], df['y']):\n", + " # plt.text(i, j, str(j), ha='center', va='bottom')\n", + "\n", + " # for model in most_model:\n", + " # plt.plot(df['ds'], df[model], label=model,marker='o')\n", + " # 当前日期画竖虚线\n", + " plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--')\n", + " plt.legend()\n", + " plt.xlabel('日期')\n", + " plt.ylabel('价格')\n", + " \n", + " plt.savefig(os.path.join(dataset,'历史价格-预测值1.png'), bbox_inches='tight')\n", + " plt.close()\n", + "\n", + " def _plt_predict_table(df): \n", + " # 预测值表格\n", + " fig, ax = plt.subplots(figsize=(20, 6))\n", + " ax.axis('off') # 关闭坐标轴\n", + " # 数值保留2位小数\n", + " df = df.round(2)\n", + " df = df[-horizon:]\n", + " df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)]\n", + " # Day列放到最前面\n", + " df = df[['Day'] + list(df.columns[:-1])]\n", + " table = ax.table(cellText=df.values, colLabels=df.columns, loc='center')\n", + " #加宽表格\n", + " table.auto_set_font_size(False)\n", + " table.set_fontsize(10)\n", + "\n", + " # 设置表格样式,列数据最小的用绿色标识\n", + " plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight')\n", + " plt.close()\n", + " \n", + " def _plt_model_results3():\n", + " # 可视化评估结果\n", + " plt.rcParams['font.sans-serif'] = ['SimHei']\n", + " fig, ax = plt.subplots(figsize=(20, 10))\n", + " ax.axis('off') # 关闭坐标轴\n", + " table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center')\n", + " # 加宽表格\n", + " table.auto_set_font_size(False)\n", + " table.set_fontsize(10)\n", + "\n", + " # 设置表格样式,列数据最小的用绿色标识\n", + " plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight')\n", + " plt.close()\n", + "\n", + " # _plt_predict_ture(df_combined3)\n", + " _plt_top10_predict_ture()\n", + " _plt_predict_table(df_combined3)\n", + " _plt_model_results3()\n", + "\n", + " return model_results3\n", + " \n", + "model_losss(sqlitedb=sqlitedb,end_time='2024-12-16')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ce1967f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/测试环境获取市场信息平台数据项.ipynb b/测试环境获取市场信息平台数据项.ipynb new file mode 100644 index 0000000..ce30594 --- /dev/null +++ b/测试环境获取市场信息平台数据项.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 25, + "id": "31c0e11d-c87a-4e95-92a0-d1d09625e255", + "metadata": {}, + "outputs": [], + "source": [ + "from config_jingbo import *\n", + "import requests\n", + "import json\n", + "import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "83c81b9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://192.168.100.53:8080/jingbo-dev/api/server/login'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "login_pushreport_url\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a058f507", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2b330ee3-c006-4ab1-8558-59c51ac8d86f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'account': 'api_test',\n", + " 'password': 'ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=',\n", + " 'tenantHashCode': '8a4577dbd919675758d57999a1e891fe',\n", + " 'terminal': 'API'},\n", + " 'funcModule': 'API',\n", + " 'funcOperation': '获取token'}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "login_data" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "dcb6100a-ed2b-4077-a1a9-361c6cb565f9", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def get_head_auth_report():\n", + " login_res = requests.post(url=login_pushreport_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " print(text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "22c0c7c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzUxNDkzMzYsImp0aSI6IjczYzJkOGJjYzQ2NzQwYjNiYWQxZmI3NjMzODM4YTcxIn0.zLVuyCEbg-x9lRXuJDYbdiwzo_nhEQGCCInnJKfQcd8', 'md5Token': '39413fe9e3e93f717d8d2713c4487172'}, 'status': True}\n" + ] + } + ], + "source": [ + "token = get_head_auth_report()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "12077ead", + "metadata": {}, + "outputs": [], + "source": [ + "# token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzE5NTkzNjUsImp0aSI6IjRiMjcwNTgzN2YyZDQxOWM4MzQ3NjI2NDQwZDlhZGQzIn0.PPgnoiJt412dJiceqVW8w7qkJFY4s-VqU9z6ZIkpqho'" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a7ae21d1", + "metadata": {}, + "outputs": [], + "source": [ + "# def upload_warning_data(warning_data):\n", + "# token = get_head_auth_report()\n", + "# warning_data = warning_data\n", + "# headers = {\"Authorization\": token}\n", + "# logger.info(\"预警上传中...\")\n", + "# logger.info(f\"token:{token}\")\n", + "# logger.info(f\"warning_data:{warning_data}\" )\n", + "# upload_res = requests.post(url=upload_warning_url, headers=headers, json=warning_data, timeout=(3, 15))\n", + "# if upload_res:\n", + "# return upload_res\n", + "# else:\n", + "# logger.info(\"预警上传失败\")\n", + "# return None\n", + "\n", + "\n", + "# logger.info(f'上传预警信息')\n", + "# try:\n", + "# warning_date = datetime.datetime.now().strftime('%Y-%m-%d')\n", + "# content = f'{warning_date}有2887个停更'\n", + "# warning_data['data']['WARNING_DATE'] = warning_date\n", + "# warning_data['data']['WARNING_CONTENT'] = content\n", + "# upload_warning_data(warning_data)\n", + "# logger.info(f'上传预警信息成功')\n", + "# except Exception as e:\n", + "# logger.error(f'上传预警信息失败:{e}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54942e1a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"confirmFlg\":false,\"data\":[],\"status\":true}\n" + ] + } + ], + "source": [ + "query_data_list_item_nos_data = {\n", + " \"funcModule\":'数据项编码集合',\n", + " \"funcOperation\":'数据项编码集合',\n", + " \"data\":{\n", + " \"dataItemNoList\":['EXCHANGE|RATE|MIDDLE_PRICE'],\n", + " \"dateEnd\":'20240101',\n", + " \"dateStart\":'20241024'\n", + " \n", + " }\n", + "}\n", + "\n", + "headers = {\"Authorization\": token}\n", + "items_res = requests.post(url=query_data_list_item_nos_url, headers=headers, json=query_data_list_item_nos_data, timeout=(3, 35))\n", + "print(items_res.text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}