原油绘图比例

This commit is contained in:
liurui 2024-11-22 13:26:10 +08:00
parent ec18d536ac
commit 9532f96507
6 changed files with 9571 additions and 16480 deletions

View File

@ -132,49 +132,49 @@ warning_data = {
# # 变量定义--测试环境 # # 变量定义--测试环境
login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login" # login_pushreport_url = "http://192.168.100.53:8080/jingbo-dev/api/server/login"
upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave" # upload_url = "http://192.168.100.53:8080/jingbo-dev/api/analysis/reportInfo/researchUploadReportSave"
# upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei # # upload_url = "http://192.168.100.109:8080/jingbo/api/analysis/reportInfo/researchUploadReportSave" # zhaoqiwei
upload_warning_url = "http://192.168.100.53:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save" # upload_warning_url = "http://192.168.100.53:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/save"
login_data = { # login_data = {
"data": { # "data": {
"account": "api_test", # "account": "api_test",
# "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456 # # "password": "MmVmNzNlOWI0MmY0ZDdjZGUwNzE3ZjFiMDJiZDZjZWU=", # Shihua@123456
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456 # "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", # 123456
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe", # "tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
"terminal": "API" # "terminal": "API"
}, # },
"funcModule": "API", # "funcModule": "API",
"funcOperation": "获取token" # "funcOperation": "获取token"
} # }
upload_data = { # upload_data = {
"funcModule":'研究报告信息', # "funcModule":'研究报告信息',
"funcOperation":'上传原油价格预测报告', # "funcOperation":'上传原油价格预测报告',
"data":{ # "data":{
"ownerAccount":'arui', #报告所属用户账号 # "ownerAccount":'arui', #报告所属用户账号
"reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST # "reportType":'OIL_PRICE_FORECAST', # 报告类型固定为OIL_PRICE_FORECAST
"fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称 # "fileName": '2000-40-5-50--100-原油指标数据.xlsx-Brent活跃合约--2024-09-06-15-01-29-预测报告.pdf', #文件名称
"fileBase64": '' ,#文件内容base64 # "fileBase64": '' ,#文件内容base64
"categoryNo":'yyjgycbg', # 研究报告分类编码 # "categoryNo":'yyjgycbg', # 研究报告分类编码
"smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码 # "smartBusinessClassCode":'YCJGYCBG', #分析报告分类编码
"reportEmployeeCode":"E40116", # 报告人 # "reportEmployeeCode":"E40116", # 报告人
"reportDeptCode" :"D0044" ,# 报告部门 # "reportDeptCode" :"D0044" ,# 报告部门
"productGroupCode":"RAW_MATERIAL" # 商品分类 # "productGroupCode":"RAW_MATERIAL" # 商品分类
} # }
} # }
warning_data = { # warning_data = {
"funcModule":'原油特征停更预警', # "funcModule":'原油特征停更预警',
"funcOperation":'原油特征停更预警', # "funcOperation":'原油特征停更预警',
"data":{ # "data":{
'WARNING_TYPE_NAME':'特征数据停更预警', # 'WARNING_TYPE_NAME':'特征数据停更预警',
'WARNING_CONTENT':'', # 'WARNING_CONTENT':'',
'WARNING_DATE':'' # 'WARNING_DATE':''
} # }
} # }
### 线上开关 ### 线上开关
# is_train = True # 是否训练 # is_train = True # 是否训练
@ -223,7 +223,7 @@ val_size = test_size # 验证集大小,同测试集大小
### 特征筛选用到的参数 ### 特征筛选用到的参数
k = 100 # 特征筛选数量如果是0或者值比特征数量大代表全部特征 k = 100 # 特征筛选数量如果是0或者值比特征数量大代表全部特征
rote = 0.06
### 文件 ### 文件
data_set = '原油指标数据.xlsx' # 数据集文件 data_set = '原油指标数据.xlsx' # 数据集文件

View File

@ -1,6 +1,3 @@
from config_jingbo import *
# from config_juxiting import *
# 导入模块 # 导入模块
import pandas as pd import pandas as pd
@ -43,6 +40,10 @@ pdfmetrics.registerFont(TTFont('SimSun', 'SimSun.ttf'))
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
from config_jingbo import *
# 定义函数 # 定义函数
def loadcsv(filename): def loadcsv(filename):
# 读取csv文件 # 读取csv文件

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
from config_jingbo import * from config_jingbo import *
from lib.dataread import * from lib.dataread import *
from lib.tools import * from lib.tools import *
from models.nerulforcastmodels import ex_Model,model_losss,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting from models.nerulforcastmodels import ex_Model,model_losss,model_losss_juxiting,brent_export_pdf,tansuanli_export_pdf,pp_export_pdf,model_losss_juxiting
import glob import glob
import torch import torch
@ -114,11 +114,12 @@ def predict_main():
is_weekday = datetime.datetime.now().weekday() == 0 is_weekday = datetime.datetime.now().weekday() == 0
if is_weekday: if is_weekday:
logger.info('今天是周一,更新预测模型') logger.info('今天是周一,更新预测模型')
# 计算最近20天预测残差最低的模型名称 # 计算最近60天预测残差最低的模型名称
model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60") model_results = sqlitedb.select_data('trueandpredict', order_by="ds DESC", limit="60")
# 删除空值率为40%以上的列,删除空行 # 删除空值率为40%以上的列
if len(model_results) > 10:
model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1) model_results = model_results.dropna(thresh=len(model_results)*0.6,axis=1)
# 删除空行
model_results = model_results.dropna() model_results = model_results.dropna()
modelnames = model_results.columns.to_list()[2:] modelnames = model_results.columns.to_list()[2:]
for col in model_results[modelnames].select_dtypes(include=['object']).columns: for col in model_results[modelnames].select_dtypes(include=['object']).columns:
@ -126,7 +127,6 @@ def predict_main():
# 计算每个预测值与真实值之间的偏差率 # 计算每个预测值与真实值之间的偏差率
for model in modelnames: for model in modelnames:
model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y'] model_results[f'{model}_abs_error_rate'] = abs(model_results['y'] - model_results[model]) / model_results['y']
# 获取每行对应的最小偏差率值 # 获取每行对应的最小偏差率值
min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1) min_abs_error_rate_values = model_results.apply(lambda row: row[[f'{model}_abs_error_rate' for model in modelnames]].min(), axis=1)
# 获取每行对应的最小偏差率值对应的列名 # 获取每行对应的最小偏差率值对应的列名
@ -136,9 +136,7 @@ def predict_main():
# 取出现次数最多的模型名称 # 取出现次数最多的模型名称
most_common_model = min_abs_error_rate_column_name.value_counts().idxmax() most_common_model = min_abs_error_rate_column_name.value_counts().idxmax()
logger.info(f"最近60天预测残差最低的模型名称{most_common_model}") logger.info(f"最近60天预测残差最低的模型名称{most_common_model}")
# 保存结果到数据库 # 保存结果到数据库
if not sqlitedb.check_table_exists('most_model'): if not sqlitedb.check_table_exists('most_model'):
sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT") sqlitedb.create_table('most_model', columns="ds datetime, most_common_model TEXT")
sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',)) sqlitedb.insert_data('most_model', (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), most_common_model,), columns=('ds', 'most_common_model',))
@ -175,7 +173,8 @@ def predict_main():
logger.info('模型训练完成') logger.info('模型训练完成')
logger.info('训练数据绘图ing') logger.info('训练数据绘图ing')
model_results3 = model_losss(sqlitedb) # model_results3 = model_losss(sqlitedb)
model_results3 = model_losss_juxiting(sqlitedb)
logger.info('训练数据绘图end') logger.info('训练数据绘图end')
# 模型报告 # 模型报告

View File

@ -365,21 +365,15 @@ def model_losss(sqlitedb):
sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'") sqlitedb.update_data('trueandpredict',set_clause,where_condition = f"ds = '{row.ds}'")
continue continue
sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns) sqlitedb.insert_data('trueandpredict',tuple(row_dict.values()),columns=columns)
# 最多频率的模型名称 # 最多频率的模型名称
min_model_max_frequency_model = df_combined3['min_model'][-50:].value_counts().idxmax() min_model_max_frequency_model = df_combined3['min_model'][-60:].value_counts().idxmax()
max_model_max_frequency_model = df_combined3['max_model'][-50:].value_counts().idxmax() max_model_max_frequency_model = df_combined3['max_model'][-60:].value_counts().idxmax()
df_predict['min_model'] = min_model_max_frequency_model df_predict['min_model'] = min_model_max_frequency_model
df_predict['max_model'] = max_model_max_frequency_model df_predict['max_model'] = max_model_max_frequency_model
df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model] df_predict['min_within_quantile'] = df_predict[min_model_max_frequency_model]
df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model] df_predict['max_within_quantile'] = df_predict[max_model_max_frequency_model]
df_predict2 = df_predict.copy() df_predict2 = df_predict.copy()
df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00') df_predict2['ds'] = df_predict2['ds'].dt.strftime('%Y-%m-%d 00:00:00')
# 将预测结果保存到数据库 # 将预测结果保存到数据库
# 判断表存在 # 判断表存在
if not sqlitedb.check_table_exists('testandpredict_groupby'): if not sqlitedb.check_table_exists('testandpredict_groupby'):
@ -393,9 +387,6 @@ def model_losss(sqlitedb):
sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f"ds = '{row.ds}'") sqlitedb.update_data('testandpredict_groupby',set_clause,where_condition = f"ds = '{row.ds}'")
continue continue
sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys()) sqlitedb.insert_data('testandpredict_groupby',tuple(row_dict.values()),columns=row_dict.keys())
# 计算每个预测值与真实值之间的偏差率 # 计算每个预测值与真实值之间的偏差率
for model in allmodelnames: for model in allmodelnames:
df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y'] df_combined3[f'{model}_abs_error_rate'] = abs(df_combined3['y'] - df_combined3[model]) / df_combined3['y']
@ -612,8 +603,8 @@ def model_losss_juxiting(sqlitedb):
upper_bound = df_combined3.loc[row.name,row['columns']].max() upper_bound = df_combined3.loc[row.name,row['columns']].max()
# 计算下边界值 # 计算下边界值
lower_bound = df_combined3.loc[row.name,row['columns']].min() lower_bound = df_combined3.loc[row.name,row['columns']].min()
return pd.Series([lower_bound, upper_bound], index=['lower_bound', 'upper_bound']) return pd.Series([lower_bound, upper_bound], index=['min_within_quantile', 'max_within_quantile'])
df_combined3[['upper_bound','lower_bound']] = names_df.apply(add_upper_lower_bound, axis=1) df_combined3[['min_within_quantile','max_within_quantile']] = names_df.apply(add_upper_lower_bound, axis=1)
def find_most_common_model(): def find_most_common_model():
@ -689,7 +680,12 @@ def model_losss_juxiting(sqlitedb):
plt.figure(figsize=(20, 10)) plt.figure(figsize=(20, 10))
plt.plot(df['ds'], df['y'], label='真实值') plt.plot(df['ds'], df['y'], label='真实值')
# 颜色填充 # 颜色填充
plt.fill_between(df['ds'], df['upper_bound'], df['lower_bound'], alpha=0.2) plt.fill_between(df['ds'], df['max_within_quantile'], df['min_within_quantile'], alpha=0.2)
# markers = ['o', 's', '^', 'D', 'v', '*', 'p', 'h', 'H', '+', 'x', 'd']
# random_marker = random.choice(markers)
# for model in allmodelnames:
# for model in ['BiTCN','RNN']:
# plt.plot(df['ds'], df[model], label=model,marker=random_marker)
# plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange') # plt.plot(df_combined3['ds'], df_combined3['min_abs_error_rate_prediction'], label='最小绝对误差', linestyle='--', color='orange')
# 网格 # 网格
plt.grid(True) plt.grid(True)
@ -758,7 +754,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
import pandas as pd import pandas as pd
feature_data_df = pd.read_csv(os.path.join(dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60) feature_data_df = pd.read_csv(os.path.join(dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
def draw_feature_trend(feature_data_df, features): def draw_feature_trend(feature_data_df, features):
# 画特征近一周的趋势图 # 画特征近60天的趋势图
feature_df = feature_data_df[['ds','y']+features] feature_df = feature_data_df[['ds','y']+features]
# 遍历X每一列和yy画散点图 # 遍历X每一列和yy画散点图
@ -821,7 +817,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_text('1. 确定波动率置信区间统计近60个交易日的真实价格波动率找出在 10% 90% 的分位值作为波动率置信区间;')) content.append(Graphs.draw_text('1. 确定波动率置信区间统计近60个交易日的真实价格波动率找出在 10% 90% 的分位值作为波动率置信区间;'))
content.append(Graphs.draw_text('2. 确定通道上界:在所有模型的预测结果中 <= 前一天真实价格 乘以 90%的置信波动分位数')) content.append(Graphs.draw_text('2. 确定通道上界:在所有模型的预测结果中 <= 前一天真实价格 乘以 90%的置信波动分位数'))
content.append(Graphs.draw_text('3. 确定通道下界:在所有模型的预测结果中 >= 前一天真实价格 乘以 10%的置信波动分位数')) content.append(Graphs.draw_text('3. 确定通道下界:在所有模型的预测结果中 >= 前一天真实价格 乘以 10%的置信波动分位数'))
content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据,通道上界取近20个交易日内预测在上界值的模型对应的预测值通道下界同理')) content.append(Graphs.draw_text('4. 预测结果没有真实值作为参考依据,通道上界取近60个交易日内预测在上界值的模型对应的预测值通道下界同理'))
content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内最多接近真实值的模型的预测值对应的预测结果')) content.append(Graphs.draw_text('5. 预测结果选用近20个交易日内最多接近真实值的模型的预测值对应的预测结果'))
content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。')) content.append(Graphs.draw_text('6. 预测结果在通道外的,代表最接近真实值的预测结果不在置信波动范围内。'))
@ -955,8 +951,6 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
else: else:
content.append(Graphs.draw_text(f'{name}类指标中与预测目标y负相关的特征有{negative_corr_features}')) content.append(Graphs.draw_text(f'{name}类指标中与预测目标y负相关的特征有{negative_corr_features}'))
draw_feature_trend(feature_data_df, negative_corr_features) draw_feature_trend(feature_data_df, negative_corr_features)
# 计算correlation_sum 第一行的相关性的绝对值的总和 # 计算correlation_sum 第一行的相关性的绝对值的总和
correlation_sum = correlation_matrix.abs().sum() correlation_sum = correlation_matrix.abs().sum()
logger.info(f'{name}类指标的相关性总和为:{correlation_sum}') logger.info(f'{name}类指标的相关性总和为:{correlation_sum}')
@ -975,10 +969,8 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_img(os.path.join(dataset,'指标分类相关性总和的气泡图.png'))) content.append(Graphs.draw_img(os.path.join(dataset,'指标分类相关性总和的气泡图.png')))
content.append(Graphs.draw_text('气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。')) content.append(Graphs.draw_text('气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
logger.info(f'绘制相关性总和的气泡图结束') logger.info(f'绘制相关性总和的气泡图结束')
content.append(Graphs.draw_little_title('模型选择:')) content.append(Graphs.draw_little_title('模型选择:'))
content.append(Graphs.draw_text(f'预测使用了{num_models}个模型进行训练拟合通过评估指标MAE从小到大排列前5个模型的简介如下')) content.append(Graphs.draw_text(f'预测使用了{num_models}个模型进行训练拟合通过评估指标MAE从小到大排列前5个模型的简介如下'))
### 读取模型简介 ### 读取模型简介
with open(os.path.join(dataset,'model_introduction.txt'), 'r', encoding='utf-8') as f: with open(os.path.join(dataset,'model_introduction.txt'), 'r', encoding='utf-8') as f:
for line in f: for line in f:
@ -986,9 +978,7 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
if line_split[0] in fivemodels_list: if line_split[0] in fivemodels_list:
for introduction in line_split: for introduction in line_split:
content.append(Graphs.draw_text(introduction)) content.append(Graphs.draw_text(introduction))
content.append(Graphs.draw_little_title('模型评估:')) content.append(Graphs.draw_little_title('模型评估:'))
df = pd.read_csv(os.path.join(dataset,'model_evaluation.csv'),encoding='utf-8') df = pd.read_csv(os.path.join(dataset,'model_evaluation.csv'),encoding='utf-8')
# 判断 df 的数值列转为float # 判断 df 的数值列转为float
for col in eval_df.columns: for col in eval_df.columns:
@ -1013,16 +1003,10 @@ def brent_export_pdf(num_indicators=475,num_models=21, num_dayindicator=202,inpu
content.append(Graphs.draw_text('模型拟合:')) content.append(Graphs.draw_text('模型拟合:'))
# 添加图片 # 添加图片
content.append(Graphs.draw_img(os.path.join(dataset,'预测值与真实值对比图.png'))) content.append(Graphs.draw_img(os.path.join(dataset,'预测值与真实值对比图.png')))
### 生成pdf文件 ### 生成pdf文件
doc = SimpleDocTemplate(os.path.join(dataset,reportname), pagesize=letter) doc = SimpleDocTemplate(os.path.join(dataset,reportname), pagesize=letter)
# doc = SimpleDocTemplate(os.path.join(dataset,'reportname.pdf'), pagesize=letter)
doc.build(content) doc.build(content)
# pdf 上传到数字化信息平台 # pdf 上传到数字化信息平台
# 读取pdf并转为base64
try: try:
if is_update_report: if is_update_report:
with open(os.path.join(dataset,reportname), 'rb') as f: with open(os.path.join(dataset,reportname), 'rb') as f: