diff --git a/codeback.py b/codeback.py index c6605ef..74e790f 100644 --- a/codeback.py +++ b/codeback.py @@ -500,4 +500,302 @@ # plt.ylabel('价格') # plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight') -# plt.close() \ No newline at end of file +# plt.close() + + + +####################################################特征处理 + + + + + + + +####################################################上传服务 + +# def upload_warning_info(last_update_times_df,y_last_update_time): +# logger.info(f'上传预警信息') +# try: +# warning_data_df = last_update_times_df[last_update_times_df['warning_date'] 0: +# content = '原油特征指标预警信息:\n\n' +# warning_data_df = warning_data_df.sort_values(by='停更周期',ascending=False) +# fixed_length = 20 +# warning_data_df['特征名称'] = warning_data_df['特征名称'].str.replace(" ", "") +# content = warning_data_df.to_string(index=False, col_space=fixed_length) + +# else: +# logger.info(f'没有需要上传的预警信息') +# content = '没有需要维护的特征指标' +# warning_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') +# warning_data['data']['WARNING_DATE'] = warning_date +# warning_data['data']['WARNING_CONTENT'] = content + +# upload_warning_data(warning_data) +# logger.info(f'上传预警信息成功') +# except Exception as e: +# logger.error(f'上传预警信息失败:{e}') + + + + + +#######################################绘图逻辑 + +# def model_losss(sqlitedb): +# global dataset +# # 预测数据处理 predict +# df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv")) +# df_combined = dateConvert(df_combined) +# # 删除空列 +# df_combined.dropna(axis=1,inplace=True) +# # 删除缺失值,预测过程不能有缺失值 +# df_combined.dropna(inplace=True) +# # 其他列转为数值类型 +# df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] }) +# # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 +# df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('min') + +# # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 +# df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']] +# # 删除模型生成的cutoff列 +# df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True) +# # 获取模型名称 +# modelnames = df_combined.columns.to_list()[2:] +# if 'y' in modelnames: +# modelnames.remove('y') +# df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要 + +# # 计算波动率 +# df_combined3['volatility'] = df_combined3['y'].pct_change().round(4) +# # 计算近60日的波动率 10% 90%分位数 +# df_combined3['quantile_10'] = df_combined3['volatility'].rolling(60).quantile(0.1) +# df_combined3['quantile_90'] = df_combined3['volatility'].rolling(60).quantile(0.9) +# df_combined3 = df_combined3.round(4) +# # 计算分位数对应的价格 +# df_combined3['quantile_10_price'] = df_combined3['y'] * (1 + df_combined3['quantile_10']) +# df_combined3['quantile_90_price'] = df_combined3['y'] * (1 + df_combined3['quantile_90']) + +# # 遍历行 +# def find_min_max_within_quantile(row): +# # 获取分位数10%和90%的值 +# q10 = row['quantile_10_price'] +# q90 = row['quantile_90_price'] + +# # 判断flot值是否为空值 +# if pd.isna(q10) or pd.isna(q90): +# return pd.Series([None, None, None, None], index=['min_within_quantile','max_within_quantile','min_model','max_model']) + +# # 初始化最小和最大值为None +# min_value = None +# max_value = None +# min_value_model = '' +# max_value_model = '' + + +# # 遍历指定列,找出在分位数范围内的最大最小值 +# for model in modelnames: +# value = row[model] +# if value >= q10 and value <= q90: +# if min_value is None or value < min_value: +# min_value = value +# min_value_model = model + +# if max_value is None or value > max_value: +# max_value = value +# max_value_model = model + +# # 返回最大最小值 +# return pd.Series([min_value, max_value,min_value_model,max_value_model], index=['min_within_quantile', 'max_within_quantile','min_model','max_model']) + +# # 应用函数到每一行 +# df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) + +# # 去除有空值的行 +# # df_combined3.dropna(inplace=True) +# # 保存到数据库 +# df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False) +# df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False) + + +# # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE +# cellText = [] + +# # 遍历模型名称,计算模型评估指标 +# for model in modelnames: +# modelmse = mse(df_combined['y'], df_combined[model]) +# modelrmse = rmse(df_combined['y'], df_combined[model]) +# modelmae = mae(df_combined['y'], df_combined[model]) +# # modelmape = mape(df_combined['y'], df_combined[model]) +# # modelsmape = smape(df_combined['y'], df_combined[model]) +# # modelr2 = r2_score(df_combined['y'], df_combined[model]) +# cellText.append([model,round(modelmse, 3), round(modelrmse, 3), round(modelmae, 3)]) + +# model_results3 = pd.DataFrame(cellText,columns=['模型(Model)','平均平方误差(MSE)', '均方根误差(RMSE)', '平均绝对误差(MAE)']) +# # 按MSE降序排列 +# model_results3 = model_results3.sort_values(by='平均平方误差(MSE)', ascending=True) +# model_results3.to_csv(os.path.join(dataset,"model_evaluation.csv"),index=False) +# modelnames = model_results3['模型(Model)'].tolist() +# allmodelnames = modelnames.copy() +# # 保存5个最佳模型的名称 +# if len(modelnames) > 5: +# modelnames = modelnames[0:5] +# with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f: +# f.write(','.join(modelnames) + '\n') + +# # 预测值与真实值对比图 +# plt.rcParams['font.sans-serif'] = ['SimHei'] +# plt.figure(figsize=(15, 10)) +# # 设置有5个子图的画布 +# for n,model in enumerate(modelnames): +# plt.subplot(3, 2, n+1) +# plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') +# plt.plot(df_combined3['ds'], df_combined3[model], label=model) +# plt.legend() +# plt.xlabel('日期') +# plt.ylabel('价格') +# plt.title(model+'拟合') +# plt.subplots_adjust(hspace=0.5) +# plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight') +# plt.close() + +# # 历史数据+预测数据 +# # 拼接未来时间预测 +# df_predict = loadcsv(os.path.join(dataset,'predict.csv')) +# df_predict.drop('unique_id',inplace=True,axis=1) +# df_predict.dropna(axis=1,inplace=True) + +# try: +# df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y-%m-%d') +# except ValueError : +# df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d') + +# # 取第一行数据存储到数据库中 +# first_row = df_predict.head(1) +# first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00') +# # 将预测结果保存到数据库 +# if not sqlitedb.check_table_exists('trueandpredict'): +# first_row.to_sql('trueandpredict',sqlitedb.connection,index=False) +# else: +# for row in first_row.itertuples(index=False): +# row_dict = row._asdict() +# columns=row_dict.keys() +# for col in columns: +# sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') +# check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'") +# if len(check_query) > 0: +# set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) +# sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'") +# continue +# sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns) +# # 最多频率的模型名称 +# num = df_combined3.shape[0] if df_combined3.shape[0] < 60 else 60 +# min_model_max_frequency_model = df_combined3['min_model'][-num:].value_counts().idxmax() +# max_model_max_frequency_model = df_combined3['max_model'][-num:].value_counts().idxmax() +# df_predict['min_model'] = min_model_max_frequency_model +# df_predict['max_model'] = max_model_max_frequency_model +# df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] +# df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model] +# df_predict2 = df_predict.copy() +# df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') +# # 将预测结果保存到数据库 +# # 判断表存在 +# if not sqlitedb.check_table_exists('testandpredict_groupby'): +# df_predict2.to_sql('testandpredict_groupby',sqlitedb.connection,index=False) +# else: +# for row in df_predict2.itertuples(index=False): +# row_dict = row._asdict() +# check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f"ds = '{row.ds}'") +# if len(check_query) > 0: +# set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) +# sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f"ds = '{row.ds}'") +# continue +# sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys()) +# # 计算每个预测值与真实值之间的偏差率 +# for model in allmodelnames: +# df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y'] + +# # 获取每行对应的最小偏差率值 +# min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1) +# # 获取每行对应的最小偏差率值对应的列名 +# min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) +# # 将列名索引转换为列名 +# min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) +# # 获取最小偏差率对应的模型的预测值 +# min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1) +# # 将最小偏差率对应的模型的预测值添加到DataFrame中 +# df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions +# df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name +# df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True) +# # 判断 df 的数值列转为float +# for col in df_combined3.columns: +# try: +# if col != 'ds': +# df_combined3[col] = df_combined3[col].astype(float) +# df_combined3[col] = df_combined3[col].round(2) +# except ValueError: +# pass +# df_combined3.to_csv(os.path.join(dataset,"df_combined3.csv"),index=False) + +# # 历史价格+预测价格 +# # df_combined3 = df_combined3[-50:] # 取50个数据点画图 +# # 历史价格 +# plt.figure(figsize=(20, 10)) +# plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') +# # 颜色填充 +# plt.fill_between(df_combined3['ds'], df_combined3['min_within_quantile'], df_combined3['max_within_quantile'], alpha=0.2) +# # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange') +# # 网格 +# plt.grid(True) +# # 显示历史值 +# for i, j in zip(df_combined3['ds'], df_combined3['y']): +# plt.text(i, j, str(j), ha='center', va='bottom') + +# # 数据库查询最佳模型名称 +# # most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]] +# most_model = modelnames[0:5] +# for model in most_model: +# plt.plot(df_combined3['ds'], df_combined3[model], label=model,marker='o') +# # 当前日期画竖虚线 +# plt.axvline(x=df_combined3['ds'].iloc[-horizon], color='r', linestyle='--') +# plt.legend() +# plt.xlabel('日期') +# plt.ylabel('价格') + +# plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight') +# plt.close() + +# # 预测值表格 +# fig, ax = plt.subplots(figsize=(20, 6)) +# ax.axis('off') # 关闭坐标轴 +# # 数值保留2位小数 +# df_combined3 = df_combined3.round(2) +# df_combined3 = df_combined3[-horizon:] +# df_combined3['Day'] = [f'Day_{i}' for i in range(1,horizon+1)] +# # Day列放到最前面 +# df_combined3 = df_combined3[['Day'] + list(df_combined3.columns[:-1])] +# table = ax.table(cellText=df_combined3.values, colLabels=df_combined3.columns, loc='center') +# #加宽表格 +# table.auto_set_font_size(False) +# table.set_fontsize(10) + +# # 设置表格样式,列数据最小的用绿色标识 +# plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight') +# plt.close() +# # plt.show() + +# # 可视化评估结果 +# plt.rcParams['font.sans-serif'] = ['SimHei'] +# fig, ax = plt.subplots(figsize=(20, 10)) +# ax.axis('off') # 关闭坐标轴 +# table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center') +# # 加宽表格 +# table.auto_set_font_size(False) +# table.set_fontsize(10) + +# # 设置表格样式,列数据最小的用绿色标识 +# plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight') +# plt.close() +# return model_results3 diff --git a/lib/dataread.py b/lib/dataread.py index b1ad11e..39c913a 100644 --- a/lib/dataread.py +++ b/lib/dataread.py @@ -124,6 +124,7 @@ def upload_warning_data(warning_data): warning_data = warning_data headers = {"Authorization": token} logger.info("预警上传中...") + logger.info(f"upload_warning_url:{upload_warning_url}") logger.info(f"token:{token}") logger.info(f"warning_data:{warning_data}" ) upload_res = requests.post(url=upload_warning_url, headers=headers, json=warning_data, timeout=(3, 15)) @@ -133,31 +134,6 @@ def upload_warning_data(warning_data): logger.info("预警上传失败") return None - -# def upload_warning_info(last_update_times_df,y_last_update_time): -# logger.info(f'上传预警信息') -# try: -# warning_data_df = last_update_times_df[last_update_times_df['warning_date'] 0: -# content = '原油特征指标预警信息:\n\n' -# warning_data_df = warning_data_df.sort_values(by='停更周期',ascending=False) -# fixed_length = 20 -# warning_data_df['特征名称'] = warning_data_df['特征名称'].str.replace(" ", "") -# content = warning_data_df.to_string(index=False, col_space=fixed_length) - -# else: -# logger.info(f'没有需要上传的预警信息') -# content = '没有需要维护的特征指标' -# warning_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') -# warning_data['data']['WARNING_DATE'] = warning_date -# warning_data['data']['WARNING_CONTENT'] = content - -# upload_warning_data(warning_data) -# logger.info(f'上传预警信息成功') -# except Exception as e: -# logger.error(f'上传预警信息失败:{e}') - def upload_warning_info(df_count): logger.info(f'上传预警信息') @@ -165,7 +141,7 @@ def upload_warning_info(df_count): warning_date = datetime.datetime.now().strftime('%Y-%m-%d') content = f'{warning_date}有{df_count}个停更' warning_data['data']['WARNING_DATE'] = warning_date - warning_data['data']['WARNING_CONTENT'] = content + warning_data['data']['WARNING_CONTENT'] = content + '2' upload_warning_data(warning_data) logger.info(f'上传预警信息成功') except Exception as e: diff --git a/main_yuanyou.py b/main_yuanyou.py index ab32603..2ca77c8 100644 --- a/main_yuanyou.py +++ b/main_yuanyou.py @@ -139,8 +139,7 @@ def predict_main(): sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) try: - # if is_weekday: - if True: + if is_weekday: logger.info('今天是周一,发送特征预警') # 上传预警信息到数据库 warning_data_df = df_zhibiaoliebiao.copy() @@ -197,7 +196,7 @@ def predict_main(): logger.info('模型训练完成') logger.info('训练数据绘图ing') - model_results3 = model_losss_juxiting(sqlitedb) + model_results3 = model_losss(sqlitedb) logger.info('训练数据绘图end') # 模型报告 diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index 9d4c342..5991c30 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -221,6 +221,10 @@ def ex_Model(df,horizon,input_size,train_steps,val_check_steps,early_stop_patien # 原油计算预测评估指数 def model_losss(sqlitedb): global dataset + global rote + most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]] + most_model_name = most_model[0] + # 预测数据处理 predict df_combined = loadcsv(os.path.join(dataset,"cross_validation.csv")) df_combined = dateConvert(df_combined) @@ -231,69 +235,18 @@ def model_losss(sqlitedb): # 其他列转为数值类型 df_combined = df_combined.astype({col: 'float32' for col in df_combined.columns if col not in ['cutoff','ds'] }) # 使用 groupby 和 transform 结合 lambda 函数来获取每个分组中 cutoff 的最小值,并创建一个新的列来存储这个最大值 - df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('min') + df_combined['max_cutoff'] = df_combined.groupby('ds')['cutoff'].transform('max') # 然后筛选出那些 cutoff 等于 max_cutoff 的行,这样就得到了每个分组中 cutoff 最大的行,并保留了其他列 df_combined = df_combined[df_combined['cutoff'] == df_combined['max_cutoff']] # 删除模型生成的cutoff列 df_combined.drop(columns=['cutoff', 'max_cutoff'], inplace=True) # 获取模型名称 - modelnames = df_combined.columns.to_list()[2:] + modelnames = df_combined.columns.to_list()[1:] if 'y' in modelnames: modelnames.remove('y') df_combined3 = df_combined.copy() # 备份df_combined,后面画图需要 - # 计算波动率 - df_combined3['volatility'] = df_combined3['y'].pct_change().round(4) - # 计算近60日的波动率 10% 90%分位数 - df_combined3['quantile_10'] = df_combined3['volatility'].rolling(60).quantile(0.1) - df_combined3['quantile_90'] = df_combined3['volatility'].rolling(60).quantile(0.9) - df_combined3 = df_combined3.round(4) - # 计算分位数对应的价格 - df_combined3['quantile_10_price'] = df_combined3['y'] * (1 + df_combined3['quantile_10']) - df_combined3['quantile_90_price'] = df_combined3['y'] * (1 + df_combined3['quantile_90']) - - # 遍历行 - def find_min_max_within_quantile(row): - # 获取分位数10%和90%的值 - q10 = row['quantile_10_price'] - q90 = row['quantile_90_price'] - - # 判断flot值是否为空值 - if pd.isna(q10) or pd.isna(q90): - return pd.Series([None, None, None, None], index=['min_within_quantile','max_within_quantile','min_model','max_model']) - - # 初始化最小和最大值为None - min_value = None - max_value = None - min_value_model = '' - max_value_model = '' - - - # 遍历指定列,找出在分位数范围内的最大最小值 - for model in modelnames: - value = row[model] - if value >= q10 and value <= q90: - if min_value is None or value < min_value: - min_value = value - min_value_model = model - - if max_value is None or value > max_value: - max_value = value - max_value_model = model - - # 返回最大最小值 - return pd.Series([min_value, max_value,min_value_model,max_value_model], index=['min_within_quantile', 'max_within_quantile','min_model','max_model']) - - # 应用函数到每一行 - df_combined3[['min_within_quantile', 'max_within_quantile','min_model','max_model']] = df_combined3.apply(find_min_max_within_quantile, axis=1) - - # 去除有空值的行 - # df_combined3.dropna(inplace=True) - # 保存到数据库 - df_combined3.to_sql('testandpredict_groupby', sqlitedb.connection, if_exists='replace', index=False) - df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False) - # 空的列表存储每个模型的MSE、RMSE、MAE、MAPE、SMAPE cellText = [] @@ -319,12 +272,11 @@ def model_losss(sqlitedb): modelnames = modelnames[0:5] with open(os.path.join(dataset,"best_modelnames.txt"), 'w') as f: f.write(','.join(modelnames) + '\n') - + # 预测值与真实值对比图 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(15, 10)) - # 设置有5个子图的画布 - for n,model in enumerate(modelnames): + for n,model in enumerate(modelnames[:5]): plt.subplot(3, 2, n+1) plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') plt.plot(df_combined3['ds'], df_combined3[model], label=model) @@ -336,9 +288,10 @@ def model_losss(sqlitedb): plt.savefig(os.path.join(dataset,'预测值与真实值对比图.png'), bbox_inches='tight') plt.close() - # 历史数据+预测数据 - # 拼接未来时间预测 - df_predict = loadcsv(os.path.join(dataset,'predict.csv')) + + # # 历史数据+预测数据 + # # 拼接未来时间预测 + df_predict = pd.read_csv(os.path.join(dataset,'predict.csv')) df_predict.drop('unique_id',inplace=True,axis=1) df_predict.dropna(axis=1,inplace=True) @@ -347,63 +300,97 @@ def model_losss(sqlitedb): except ValueError : df_predict['ds'] = pd.to_datetime(df_predict['ds'],format=r'%Y/%m/%d') - # 取第一行数据存储到数据库中 - first_row = df_predict.head(1) - first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00') - # 将预测结果保存到数据库 - if not sqlitedb.check_table_exists('trueandpredict'): - first_row.to_sql('trueandpredict',sqlitedb.connection,index=False) - else: - for row in first_row.itertuples(index=False): - row_dict = row._asdict() - columns=row_dict.keys() - for col in columns: - sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') - check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns) - # 最多频率的模型名称 - num = df_combined3.shape[0] if df_combined3.shape[0] < 60 else 60 - min_model_max_frequency_model = df_combined3['min_model'][-num:].value_counts().idxmax() - max_model_max_frequency_model = df_combined3['max_model'][-num:].value_counts().idxmax() - df_predict['min_model'] = min_model_max_frequency_model - df_predict['max_model'] = max_model_max_frequency_model - df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] - df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model] - df_predict2 = df_predict.copy() - df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') - # 将预测结果保存到数据库 - # 判断表存在 - if not sqlitedb.check_table_exists('testandpredict_groupby'): - df_predict2.to_sql('testandpredict_groupby',sqlitedb.connection,index=False) - else: - for row in df_predict2.itertuples(index=False): - row_dict = row._asdict() - check_query = sqlitedb.select_data('testandpredict_groupby',where_condition = f"ds = '{row.ds}'") - if len(check_query) > 0: - set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) - sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f"ds = '{row.ds}'") - continue - sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys()) - # 计算每个预测值与真实值之间的偏差率 - for model in allmodelnames: - df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y'] + def first_row_to_database(df): + # # 取第一行数据存储到数据库中 + first_row = df.head(1) + first_row['ds'] = first_row['ds'].dt.strftime('%Y-%m-%d 00:00:00') + # 将预测结果保存到数据库 + if not sqlitedb.check_table_exists('trueandpredict'): + first_row.to_sql('trueandpredict',sqlitedb.connection,index=False) + else: + for col in first_row.columns: + sqlitedb.add_column_if_not_exists('trueandpredict',col,'TEXT') + for row in first_row.itertuples(index=False): + row_dict = row._asdict() + columns=row_dict.keys() + check_query = sqlitedb.select_data('trueandpredict',where_condition = f"ds = '{row.ds}'") + if len(check_query) > 0: + set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()]) + sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'") + continue + sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns) + + first_row_to_database(df_predict) - # 获取每行对应的最小偏差率值 - min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1) - # 获取每行对应的最小偏差率值对应的列名 - min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) - # 将列名索引转换为列名 - min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) - # 获取最小偏差率对应的模型的预测值 - min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1) - # 将最小偏差率对应的模型的预测值添加到DataFrame中 - df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions - df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name df_combined3 = pd.concat([df_combined3, df_predict]).reset_index(drop=True) + + # 计算每个模型与最佳模型的绝对误差比例,根据设置的阈值rote筛选预测值显示最大最小值 + names = [] + names_df = df_combined3.copy() + for col in allmodelnames: + names_df[f'{col}-{most_model_name}-误差比例'] = abs(names_df[col] - names_df[most_model_name]) / names_df[most_model_name] + names.append(f'{col}-{most_model_name}-误差比例') + + names_df = names_df[names] + def add_rote_column(row): + columns = [] + for r in names_df.columns: + if row[r] <= rote: + columns.append(r.split('-')[0]) + return pd.Series([columns], index=['columns']) + names_df['columns'] = names_df.apply(add_rote_column, axis=1) + + def add_upper_lower_bound(row): + print(row['columns']) + print(type(row['columns'])) + # 计算上边界值 + upper_bound = df_combined3.loc[row.name,row['columns']].max() + # 计算下边界值 + lower_bound = df_combined3.loc[row.name,row['columns']].min() + return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile']) + df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1) + + + def find_most_common_model(): + # 最多频率的模型名称 + min_model_max_frequency_model = df_combined3['min_model'].tail(20).value_counts().idxmax() + max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().idxmax() + if min_model_max_frequency_model == max_model_max_frequency_model: + # 取20天第二多的模型 + max_model_max_frequency_model = df_combined3['max_model'].tail(20).value_counts().nlargest(2).index[1] + + df_predict['min_model'] = min_model_max_frequency_model + df_predict['max_model'] = max_model_max_frequency_model + df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] + df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model] + + + # find_most_common_model() + + df_predict2 = df_predict.copy() + df_predict2['ds'] = pd.to_datetime(df_predict2['ds']) + df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') + + + def _add_abs_error_rate(): + # 计算每个预测值与真实值之间的偏差率 + for model in allmodelnames: + df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y'] + + # 获取每行对应的最小偏差率值 + min_abs_error_rate_values = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].min(), axis=1) + # 获取每行对应的最小偏差率值对应的列名 + min_abs_error_rate_column_name = df_combined3.apply(lambda row: row[[f'{model}_abs_error_rate' for model in allmodelnames]].idxmin(), axis=1) + # 将列名索引转换为列名 + min_abs_error_rate_column_name = min_abs_error_rate_column_name.map(lambda x: x.split('_')[0]) + # 获取最小偏差率对应的模型的预测值 + min_abs_error_rate_predictions = df_combined3.apply(lambda row: row[min_abs_error_rate_column_name[row.name]], axis=1) + # 将最小偏差率对应的模型的预测值添加到DataFrame中 + df_combined3['min_abs_error_rate_prediction'] = min_abs_error_rate_predictions + df_combined3['min_abs_error_rate_column_name'] = min_abs_error_rate_column_name + + # _add_abs_error_rate() + # 判断 df 的数值列转为float for col in df_combined3.columns: try: @@ -412,69 +399,84 @@ def model_losss(sqlitedb): df_combined3[col] = df_combined3[col].round(2) except ValueError: pass - df_combined3.to_csv(os.path.join(dataset,"df_combined3.csv"),index=False) + df_combined3.to_csv(os.path.join(dataset,"testandpredict_groupby.csv"),index=False) + # 历史价格+预测价格 - # df_combined3 = df_combined3[-50:] # 取50个数据点画图 - # 历史价格 - plt.figure(figsize=(20, 10)) - plt.plot(df_combined3['ds'], df_combined3['y'], label='真实值') - # 颜色填充 - plt.fill_between(df_combined3['ds'], df_combined3['min_within_quantile'], df_combined3['max_within_quantile'], alpha=0.2) - # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange') - # 网格 - plt.grid(True) - # 显示历史值 - for i, j in zip(df_combined3['ds'], df_combined3['y']): - plt.text(i, j, str(j), ha='center', va='bottom') - - # 数据库查询最佳模型名称 - # most_model = [sqlitedb.select_data('most_model',columns=['most_common_model'],order_by='ds desc',limit=1).values[0][0]] - most_model = modelnames[0:5] - for model in most_model: - plt.plot(df_combined3['ds'], df_combined3[model], label=model,marker='o') - # 当前日期画竖虚线 - plt.axvline(x=df_combined3['ds'].iloc[-horizon], color='r', linestyle='--') - plt.legend() - plt.xlabel('日期') - plt.ylabel('价格') + sqlitedb.drop_table('testandpredict_groupby') + df_combined3.to_sql('testandpredict_groupby',sqlitedb.connection,index=False) - plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight') - plt.close() - - # 预测值表格 - fig, ax = plt.subplots(figsize=(20, 6)) - ax.axis('off') # 关闭坐标轴 - # 数值保留2位小数 - df_combined3 = df_combined3.round(2) - df_combined3 = df_combined3[-horizon:] - df_combined3['Day'] = [f'Day_{i}' for i in range(1,horizon+1)] - # Day列放到最前面 - df_combined3 = df_combined3[['Day'] + list(df_combined3.columns[:-1])] - table = ax.table(cellText=df_combined3.values, colLabels=df_combined3.columns, loc='center') - #加宽表格 - table.auto_set_font_size(False) - table.set_fontsize(10) + def _plt_predict_ture(df): + lens = df.shape[0] if df.shape[0] < 180 else 90 + df = df[-lens:] # 取180个数据点画图 + # 历史价格 + plt.figure(figsize=(20, 10)) + plt.plot(df['ds'], df['y'], label='真实值') + # 颜色填充 + plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2) + # markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd'] + # random_marker = random.choice(markers) + # for model in allmodelnames: + # for model in ['BiTCN','RNN']: + # plt.plot(df['ds'], df[model], label=model,marker=random_marker) + # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange') + # 网格 + plt.grid(True) + # 显示历史值 + for i, j in zip(df['ds'], df['y']): + plt.text(i, j, str(j), ha='center', va='bottom') - # 设置表格样式,列数据最小的用绿色标识 - plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight') - plt.close() - # plt.show() - - # 可视化评估结果 - plt.rcParams['font.sans-serif'] = ['SimHei'] - fig, ax = plt.subplots(figsize=(20, 10)) - ax.axis('off') # 关闭坐标轴 - table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center') - # 加宽表格 - table.auto_set_font_size(False) - table.set_fontsize(10) + for model in most_model: + plt.plot(df['ds'], df[model], label=model,marker='o') + # 当前日期画竖虚线 + plt.axvline(x=df['ds'].iloc[-horizon], color='r', linestyle='--') + plt.legend() + plt.xlabel('日期') + plt.ylabel('价格') + + plt.savefig(os.path.join(dataset,'历史价格-预测值.png'), bbox_inches='tight') + plt.close() + + def _plt_predict_table(df): + # 预测值表格 + fig, ax = plt.subplots(figsize=(20, 6)) + ax.axis('off') # 关闭坐标轴 + # 数值保留2位小数 + df = df.round(2) + df = df[-horizon:] + df['Day'] = [f'Day_{i}' for i in range(1,horizon+1)] + # Day列放到最前面 + df = df[['Day'] + list(df.columns[:-1])] + table = ax.table(cellText=df.values, colLabels=df.columns, loc='center') + #加宽表格 + table.auto_set_font_size(False) + table.set_fontsize(10) + + # 设置表格样式,列数据最小的用绿色标识 + plt.savefig(os.path.join(dataset,'预测值表格.png'), bbox_inches='tight') + plt.close() + + def _plt_model_results3(): + # 可视化评估结果 + plt.rcParams['font.sans-serif'] = ['SimHei'] + fig, ax = plt.subplots(figsize=(20, 10)) + ax.axis('off') # 关闭坐标轴 + table = ax.table(cellText=model_results3.values, colLabels=model_results3.columns, loc='center') + # 加宽表格 + table.auto_set_font_size(False) + table.set_fontsize(10) + + # 设置表格样式,列数据最小的用绿色标识 + plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight') + plt.close() + + _plt_predict_ture(df_combined3) + _plt_predict_table(df_combined3) + _plt_model_results3() - # 设置表格样式,列数据最小的用绿色标识 - plt.savefig(os.path.join(dataset,'模型评估.png'), bbox_inches='tight') - plt.close() return model_results3 + # 聚烯烃计算预测评估指数 def model_losss_juxiting(sqlitedb): global dataset diff --git a/测试环境登录接口调试.ipynb b/测试环境登录接口调试.ipynb index 15440a6..556ba64 100644 --- a/测试环境登录接口调试.ipynb +++ b/测试环境登录接口调试.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "id": "31c0e11d-c87a-4e95-92a0-d1d09625e255", "metadata": {}, "outputs": [], @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "id": "83c81b9e", "metadata": {}, "outputs": [ @@ -25,7 +25,7 @@ "'http://192.168.100.53:8080/jingbo-dev/api/server/login'" ] }, - "execution_count": 2, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "id": "2b330ee3-c006-4ab1-8558-59c51ac8d86f", "metadata": {}, "outputs": [ @@ -59,7 +59,7 @@ " 'funcOperation': '获取token'}" ] }, - "execution_count": 3, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "dcb6100a-ed2b-4077-a1a9-361c6cb565f9", "metadata": {}, "outputs": [], @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "22c0c7c4", "metadata": {}, "outputs": [ @@ -95,7 +95,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjMwMjQsImp0aSI6IjhjZWE4YWQ4YWU3YTQyMmY4ODkxYWY4N2RhNmFmNGI5In0.Doq76Zh4PWFr6U0ICJsWpcpFX7tALvIadgXKkt_IHTc', 'md5Token': '091cf636ce5a735ef287a312b1c5d410'}, 'status': True}\n" + "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjU1NTYsImp0aSI6IjczNjVlNTZmNTZiYjQ5YjhhNjE3MzhiNDJhMWVmOTJjIn0.pUUzeBqbcHv2B3Z2ZQ6pDdBscWeVGlai3LvVU-Hm03E', 'md5Token': 'f288634c14d5e93fc9c0b7a423a8ba33'}, 'status': True}\n" ] } ], @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "id": "12077ead", "metadata": {}, "outputs": [], @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "id": "a7ae21d1", "metadata": {}, "outputs": [ @@ -127,8 +127,8 @@ "INFO:my_logger:上传预警信息\n", "预警上传中...\n", "INFO:my_logger:预警上传中...\n", - "token:eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjMwMjQsImp0aSI6IjdiNTdhNDUxNWUwOTQzYWZhNWEwYTUxNzllM2Y0MDQ1In0.7KTHvBMEpsRPM9esVdp3MPLz_5WCjuK1vZvwkhbhfy0\n", - "INFO:my_logger:token:eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjMwMjQsImp0aSI6IjdiNTdhNDUxNWUwOTQzYWZhNWEwYTUxNzllM2Y0MDQ1In0.7KTHvBMEpsRPM9esVdp3MPLz_5WCjuK1vZvwkhbhfy0\n", + "token:eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjU1NTYsImp0aSI6Ijk2ZjJlNDg4NDgzMzQyYThhYmMyYzVhYjg2NGNhNDhhIn0.Vl6wmKDRxPdZANwEEWAQ4wBPbJKC2YWVi0Gm51ZzjE0\n", + "INFO:my_logger:token:eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjU1NTYsImp0aSI6Ijk2ZjJlNDg4NDgzMzQyYThhYmMyYzVhYjg2NGNhNDhhIn0.Vl6wmKDRxPdZANwEEWAQ4wBPbJKC2YWVi0Gm51ZzjE0\n", "warning_data:{'funcModule': '原油特征停更预警', 'funcOperation': '原油特征停更预警', 'data': {'WARNING_TYPE_NAME': '特征数据停更预警', 'WARNING_CONTENT': '2024-12-05有34个停更', 'WARNING_DATE': '2024-12-05'}}\n", "INFO:my_logger:warning_data:{'funcModule': '原油特征停更预警', 'funcOperation': '原油特征停更预警', 'data': {'WARNING_TYPE_NAME': '特征数据停更预警', 'WARNING_CONTENT': '2024-12-05有34个停更', 'WARNING_DATE': '2024-12-05'}}\n" ] @@ -137,7 +137,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjMwMjQsImp0aSI6IjdiNTdhNDUxNWUwOTQzYWZhNWEwYTUxNzllM2Y0MDQ1In0.7KTHvBMEpsRPM9esVdp3MPLz_5WCjuK1vZvwkhbhfy0', 'md5Token': '33e47710d77c32c7f3db2c83cd2bd621'}, 'status': True}\n" + "{'confirmFlg': False, 'data': {'accessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfdGVzdCIsInRoIjoiOGE0NTc3ZGJkOTE5Njc1NzU4ZDU3OTk5YTFlODkxZmUiLCJsdCI6ImFwaSIsImlzcyI6IiIsInRtIjoiUEMiLCJleHAiOjE3MzM0MjU1NTYsImp0aSI6Ijk2ZjJlNDg4NDgzMzQyYThhYmMyYzVhYjg2NGNhNDhhIn0.Vl6wmKDRxPdZANwEEWAQ4wBPbJKC2YWVi0Gm51ZzjE0', 'md5Token': '99b49d2d29f44041f46ecd03a3987961'}, 'status': True}\n" ] }, {