更改报告名称
This commit is contained in:
parent
3f06049e70
commit
06cfab9de8
@ -2223,7 +2223,7 @@ def addtimecharacteristics(df, dataset):
|
||||
# 从数据库获取百川数据,接收一个百川id列表,返回df格式的数据
|
||||
def get_baichuan_data(baichuanidnamedict):
|
||||
baichuanidlist = [str(k) for k in baichuanidnamedict.keys()]
|
||||
baichuanidlist= [f"'{id}'" for id in baichuanidlist]
|
||||
baichuanidlist = [f"'{id}'" for id in baichuanidlist]
|
||||
startdate = str(config.start_year)+'0101'
|
||||
# 连接数据库
|
||||
db = config.db_mysql
|
||||
@ -2237,7 +2237,7 @@ def get_baichuan_data(baichuanidnamedict):
|
||||
df = pd.DataFrame(results, columns=[
|
||||
'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE'])
|
||||
df['BAICHUAN_ID'] = df['BAICHUAN_ID'].astype('string')
|
||||
df.to_csv('百川数据test.csv', index=False)
|
||||
df.to_csv(os.path.join(config.dataset, '百川数据.csv'), index=False)
|
||||
|
||||
# 按BAICHUAN_ID 进行分组,然后按DATA_DATE合并
|
||||
df1 = pd.DataFrame(columns=['DATA_DATE'])
|
||||
|
@ -3,7 +3,7 @@
|
||||
from lib.dataread import *
|
||||
from config_shiyoujiao_lvyong import *
|
||||
from lib.tools import SendMail, exception_logger
|
||||
from models.nerulforcastmodels import ex_Model, model_losss,shiyoujiao_lvyong_export_pdf
|
||||
from models.nerulforcastmodels import ex_Model, model_losss, shiyoujiao_lvyong_export_pdf
|
||||
import datetime
|
||||
import torch
|
||||
torch.set_float32_matmul_precision("high")
|
||||
@ -422,8 +422,9 @@ def predict_main():
|
||||
# 模型报告
|
||||
logger.info('制作报告ing')
|
||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
||||
reportname = f'石油焦铝用大模型日度预测--{end_time}.pdf' # 报告文件名
|
||||
reportname = reportname.replace(':', '-') # 替换冒号
|
||||
reportname = '石油焦铝用渠道.pdf' # 报告文件名
|
||||
# reportname = f'石油焦铝用大模型日度预测--{end_time}.pdf' # 报告文件名
|
||||
# reportname = reportname.replace(':', '-') # 替换冒号
|
||||
shiyoujiao_lvyong_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time,
|
||||
reportname=reportname, sqlitedb=sqlitedb),
|
||||
|
||||
@ -432,7 +433,6 @@ def predict_main():
|
||||
|
||||
push_market_value()
|
||||
|
||||
|
||||
# 发送邮件
|
||||
# m = SendMail(
|
||||
# username=username,
|
||||
|
@ -194,7 +194,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
||||
nf_preds = nf.cross_validation(
|
||||
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
||||
nf_preds.to_csv(os.path.join(
|
||||
config.dataset,"cross_validation.csv"), index=False)
|
||||
config.dataset, "cross_validation.csv"), index=False)
|
||||
|
||||
nf_preds = nf_preds.reset_index()
|
||||
# 保存模型
|
||||
@ -208,7 +208,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
||||
# glob获取dataset下最新的joblib文件
|
||||
import glob
|
||||
filename = max(glob.glob(os.path.join(
|
||||
config.dataset,'*.joblib')), key=os.path.getctime)
|
||||
config.dataset, '*.joblib')), key=os.path.getctime)
|
||||
config.logger.info('读取模型:' + filename)
|
||||
nf = load(filename)
|
||||
# 测试集预测
|
||||
@ -216,7 +216,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
||||
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
||||
# 测试集预测结果保存
|
||||
nf_test_preds.to_csv(os.path.join(
|
||||
config.dataset,"cross_validation.csv"), index=False)
|
||||
config.dataset, "cross_validation.csv"), index=False)
|
||||
|
||||
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
||||
|
||||
@ -412,7 +412,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
||||
nf_preds = nf.cross_validation(
|
||||
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
||||
nf_preds.to_csv(os.path.join(
|
||||
config.dataset,"cross_validation.csv"), index=False)
|
||||
config.dataset, "cross_validation.csv"), index=False)
|
||||
|
||||
nf_preds = nf_preds.reset_index()
|
||||
# 保存模型
|
||||
@ -425,7 +425,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
||||
# glob获取dataset下最新的joblib文件
|
||||
import glob
|
||||
filename = max(glob.glob(os.path.join(
|
||||
config.dataset,'*.joblib')), key=os.path.getctime)
|
||||
config.dataset, '*.joblib')), key=os.path.getctime)
|
||||
config.logger.info('读取模型:' + filename)
|
||||
nf = load(filename)
|
||||
# 测试集预测
|
||||
@ -433,7 +433,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
||||
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
||||
# 测试集预测结果保存
|
||||
nf_test_preds.to_csv(os.path.join(
|
||||
config.dataset,"cross_validation.csv"), index=False)
|
||||
config.dataset, "cross_validation.csv"), index=False)
|
||||
|
||||
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
||||
|
||||
@ -537,7 +537,7 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
model_results3 = model_results3.sort_values(
|
||||
by='平均平方误差(MSE)', ascending=True)
|
||||
model_results3.to_csv(os.path.join(
|
||||
config.dataset,"model_evaluation.csv"), index=False)
|
||||
config.dataset, "model_evaluation.csv"), index=False)
|
||||
modelnames = model_results3['模型(Model)'].tolist()
|
||||
allmodelnames = modelnames.copy()
|
||||
# 保存5个最佳模型的名称
|
||||
@ -561,7 +561,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
plt.ylabel('价格')
|
||||
plt.title(model+'拟合')
|
||||
plt.subplots_adjust(hspace=0.5)
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
# # 历史数据+预测数据
|
||||
@ -714,7 +715,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
sqlitedb.update_data(
|
||||
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
||||
except:
|
||||
config.logger.error(f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||
config.logger.error(
|
||||
f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||
|
||||
df = accuracy_df.copy()
|
||||
df['ds'] = pd.to_datetime(df['ds'])
|
||||
@ -803,7 +805,7 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
except ValueError:
|
||||
pass
|
||||
df_combined3.to_csv(os.path.join(
|
||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
||||
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||
|
||||
# 历史价格+预测价格
|
||||
sqlitedb.drop_table('testandpredict_groupby')
|
||||
@ -837,7 +839,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
plt.xlabel('日期')
|
||||
plt.ylabel('价格')
|
||||
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_modeltopten_predict_ture(df):
|
||||
@ -866,7 +869,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||
|
||||
# 当前日期画竖虚线
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||
color='r', linestyle='--')
|
||||
plt.legend()
|
||||
plt.xlabel('日期')
|
||||
plt.ylabel('价格')
|
||||
@ -892,7 +896,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_model_results3():
|
||||
@ -907,7 +912,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
_plt_predict_ture(df_combined3)
|
||||
@ -939,7 +945,8 @@ def model_losss(sqlitedb, end_time):
|
||||
if len(df_combined) < 100:
|
||||
len(df_combined) + ''
|
||||
except:
|
||||
df_combined = loadcsv(os.path.join(config.dataset, "cross_validation.csv"))
|
||||
df_combined = loadcsv(os.path.join(
|
||||
config.dataset, "cross_validation.csv"))
|
||||
df_combined = dateConvert(df_combined)
|
||||
df_combined['CREAT_DATE'] = df_combined['cutoff']
|
||||
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||
@ -1297,7 +1304,8 @@ def model_losss(sqlitedb, end_time):
|
||||
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
||||
plt.plot(df['ds'], df[most_model_name], label=model, marker='o')
|
||||
# 当前日期画竖虚线
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||
color='r', linestyle='--')
|
||||
plt.legend()
|
||||
plt.xlabel('日期')
|
||||
# 设置横轴日期格式为年-月-日
|
||||
@ -1338,7 +1346,8 @@ def model_losss(sqlitedb, end_time):
|
||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||
|
||||
# 当前日期画竖虚线
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||
color='r', linestyle='--')
|
||||
plt.legend()
|
||||
plt.xlabel('日期')
|
||||
# 自动设置横轴日期显示
|
||||
@ -1458,7 +1467,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
model_results3 = model_results3.sort_values(
|
||||
by='平均平方误差(MSE)', ascending=True)
|
||||
model_results3.to_csv(os.path.join(
|
||||
config.dataset,"model_evaluation.csv"), index=False)
|
||||
config.dataset, "model_evaluation.csv"), index=False)
|
||||
modelnames = model_results3['模型(Model)'].tolist()
|
||||
allmodelnames = modelnames.copy()
|
||||
# 保存5个最佳模型的名称
|
||||
@ -1482,7 +1491,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
plt.ylabel('价格')
|
||||
plt.title(model+'拟合')
|
||||
plt.subplots_adjust(hspace=0.5)
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
# # 历史数据+预测数据
|
||||
@ -1635,7 +1645,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
sqlitedb.update_data(
|
||||
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
||||
except:
|
||||
config.logger.error(f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||
config.logger.error(
|
||||
f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||
|
||||
df = accuracy_df.copy()
|
||||
df['ds'] = pd.to_datetime(df['ds'])
|
||||
@ -1724,7 +1735,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
except ValueError:
|
||||
pass
|
||||
df_combined3.to_csv(os.path.join(
|
||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
||||
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||
|
||||
# 历史价格+预测价格
|
||||
sqlitedb.drop_table('testandpredict_groupby')
|
||||
@ -1768,7 +1779,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
plt.xticks(rotation=45) # 日期标签旋转45度,防止重叠
|
||||
plt.ylabel('价格')
|
||||
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_modeltopten_predict_ture(df):
|
||||
@ -1828,7 +1840,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_model_results3():
|
||||
@ -1843,7 +1856,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
_plt_predict_ture(df_combined3)
|
||||
@ -1856,7 +1870,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
||||
|
||||
# 聚烯烃计算预测评估指数
|
||||
@exception_logger
|
||||
def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
def model_losss_juxiting(sqlitedb, end_time, is_fivemodels):
|
||||
global dataset
|
||||
global rote
|
||||
most_model = [sqlitedb.select_data('most_model', columns=[
|
||||
@ -1934,7 +1948,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
plt.ylabel('价格')
|
||||
plt.title(model+'拟合')
|
||||
plt.subplots_adjust(hspace=0.5)
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
# # 历史数据+预测数据
|
||||
@ -2062,7 +2077,7 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
except ValueError:
|
||||
pass
|
||||
df_combined3.to_csv(os.path.join(
|
||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
||||
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||
|
||||
# 历史价格+预测价格
|
||||
sqlitedb.drop_table('testandpredict_groupby')
|
||||
@ -2093,12 +2108,14 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
for model in most_model:
|
||||
plt.plot(df['ds'], df[model], label=model, marker='o')
|
||||
# 当前日期画竖虚线
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
||||
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||
color='r', linestyle='--')
|
||||
plt.legend()
|
||||
plt.xlabel('日期')
|
||||
plt.ylabel('价格')
|
||||
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_predict_table(df):
|
||||
@ -2118,7 +2135,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def _plt_model_results3():
|
||||
@ -2133,7 +2151,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
||||
table.set_fontsize(10)
|
||||
|
||||
# 设置表格样式,列数据最小的用绿色标识
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
_plt_predict_ture(df_combined3)
|
||||
@ -2151,7 +2170,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
# 获取特征的近一月值
|
||||
import pandas as pd
|
||||
feature_data_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||
|
||||
def draw_feature_trend(feature_data_df, features):
|
||||
# 画特征近60天的趋势图
|
||||
@ -2214,26 +2233,29 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
# 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
# 添加历史走势及预测价格的走势图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
# 波动率画图逻辑
|
||||
content.append(Graphs.draw_text('图示说明:'))
|
||||
content.append(Graphs.draw_text(
|
||||
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||
|
||||
# 添加历史走势及预测价格的走势图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值1.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值1.png')))
|
||||
content.append(Graphs.draw_text('图示说明:'))
|
||||
content.append(Graphs.draw_text(
|
||||
' 确定置信区间:使用模型评估指标MAE得到前十个模型,取平均值上下1.5作为价格波动置信区间;'))
|
||||
|
||||
# 取df中y列为空的行
|
||||
import pandas as pd
|
||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df_true = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
df_true = df_true[['ds', 'y']]
|
||||
eval_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 按评估指标排序,取前五
|
||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||
# 取 fivemodels_list 和 ds 列
|
||||
@ -2261,7 +2283,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_table(col_width, *data))
|
||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
df4 = df.copy() # 计算偏差率使用
|
||||
# 去掉created_dt 列
|
||||
df4 = df4.drop(columns=['created_dt'])
|
||||
@ -2313,7 +2335,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||
encoding='utf-8') # 计算相关系数用
|
||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||
|
||||
# 绘制特征相关气泡图
|
||||
@ -2345,7 +2367,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
plt.xlabel('相关系数')
|
||||
plt.ylabel('频数')
|
||||
plt.savefig(os.path.join(
|
||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||
@ -2404,7 +2426,8 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_img(os.path.join(
|
||||
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_text(
|
||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||
@ -2420,7 +2443,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_text(introduction))
|
||||
content.append(Graphs.draw_little_title('模型评估:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 判断 df 的数值列转为float
|
||||
for col in eval_df.columns:
|
||||
if col not in ['模型(Model)']:
|
||||
@ -2446,9 +2469,11 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||
content.append(Graphs.draw_text('模型拟合:'))
|
||||
# 添加图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
# 生成pdf文件
|
||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||
doc = SimpleDocTemplate(os.path.join(
|
||||
config.dataset, reportname), pagesize=letter)
|
||||
doc.build(content)
|
||||
# pdf 上传到数字化信息平台
|
||||
try:
|
||||
@ -2471,7 +2496,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
# 获取特征的近一月值
|
||||
import pandas as pd
|
||||
feature_data_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||
|
||||
def draw_feature_trend(feature_data_df, features):
|
||||
# 画特征近60天的趋势图
|
||||
@ -2529,26 +2554,27 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
|
||||
|
||||
# 添加标题
|
||||
content.append(Graphs.draw_title(f'{config.y}{time}预测报告'))
|
||||
content.append(Graphs.draw_title('石油焦铝用渠道'))
|
||||
|
||||
# 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
# 添加历史走势及预测价格的走势图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
# 波动率画图逻辑
|
||||
content.append(Graphs.draw_text('图示说明:'))
|
||||
content.append(Graphs.draw_text(
|
||||
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||
|
||||
|
||||
# 取df中y列为空的行
|
||||
import pandas as pd
|
||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df_true = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
df_true = df_true[['ds', 'y']]
|
||||
eval_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 按评估指标排序,取前五
|
||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||
# 取 fivemodels_list 和 ds 列
|
||||
@ -2576,7 +2602,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
content.append(Graphs.draw_table(col_width, *data))
|
||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
df4 = df.copy() # 计算偏差率使用
|
||||
# 去掉created_dt 列
|
||||
df4 = df4.drop(columns=['created_dt'])
|
||||
@ -2628,7 +2654,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||
encoding='utf-8') # 计算相关系数用
|
||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||
|
||||
# 绘制特征相关气泡图
|
||||
@ -2660,7 +2686,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
plt.xlabel('相关系数')
|
||||
plt.ylabel('频数')
|
||||
plt.savefig(os.path.join(
|
||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||
@ -2719,7 +2745,8 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_img(os.path.join(
|
||||
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_text(
|
||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||
@ -2735,7 +2762,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
content.append(Graphs.draw_text(introduction))
|
||||
content.append(Graphs.draw_little_title('模型评估:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 判断 df 的数值列转为float
|
||||
for col in eval_df.columns:
|
||||
if col not in ['模型(Model)']:
|
||||
@ -2761,9 +2788,11 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||
content.append(Graphs.draw_text('模型拟合:'))
|
||||
# 添加图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
# 生成pdf文件
|
||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||
doc = SimpleDocTemplate(os.path.join(
|
||||
config.dataset, reportname), pagesize=letter)
|
||||
doc.build(content)
|
||||
# pdf 上传到数字化信息平台
|
||||
try:
|
||||
@ -2785,7 +2814,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
# 获取特征的近一月值
|
||||
import pandas as pd
|
||||
feature_data_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(20)
|
||||
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(20)
|
||||
|
||||
def draw_feature_trend(feature_data_df, features):
|
||||
# 画特征近一周的趋势图
|
||||
@ -2845,16 +2874,18 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
# 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
# 添加历史走势及预测价格的走势图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
|
||||
# 取df中y列为空的行
|
||||
import pandas as pd
|
||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df_true = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
df_true = df_true[['ds', 'y']]
|
||||
eval_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 按评估指标排序,取前五
|
||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||
# 取 fivemodels_list 和 ds 列
|
||||
@ -2882,7 +2913,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
content.append(Graphs.draw_table(col_width, *data))
|
||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
df4 = df.copy() # 计算偏差率使用
|
||||
# 计算模型偏差率
|
||||
# 计算各列对于y列的差值百分比
|
||||
@ -2925,7 +2956,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||
encoding='utf-8') # 计算相关系数用
|
||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||
|
||||
# 绘制特征相关气泡图
|
||||
@ -2957,7 +2988,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
plt.xlabel('相关系数')
|
||||
plt.ylabel('频数')
|
||||
plt.savefig(os.path.join(
|
||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||
@ -3017,7 +3048,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||
bbox_inches='tight')
|
||||
plt.close()
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_img(os.path.join(
|
||||
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||
content.append(Graphs.draw_text(
|
||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||
@ -3037,7 +3069,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
content.append(Graphs.draw_little_title('模型评估:'))
|
||||
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 判断 df 的数值列转为float
|
||||
for col in eval_df.columns:
|
||||
if col not in ['模型(Model)']:
|
||||
@ -3059,12 +3091,13 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
content.append(Graphs.draw_text(
|
||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||
content.append(Graphs.draw_text('模型拟合:'))
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
|
||||
# 附1,特征列表
|
||||
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
||||
df_fuyi = pd.read_csv(os.path.join(
|
||||
config.dataset,'特征频度统计.csv'), encoding='utf-8')
|
||||
config.dataset, '特征频度统计.csv'), encoding='utf-8')
|
||||
for col in df_fuyi.columns:
|
||||
fuyi = df_fuyi[col]
|
||||
fuyi = fuyi.dropna()
|
||||
@ -3073,7 +3106,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
||||
|
||||
# 生成pdf文件
|
||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||
doc = SimpleDocTemplate(os.path.join(
|
||||
config.dataset, reportname), pagesize=letter)
|
||||
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
||||
doc.build(content)
|
||||
# pdf 上传到数字化信息平台
|
||||
@ -3105,16 +3139,18 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
# imgs = glob.glob(os.path.join(config.dataset,'*历史价格-预测值.png'))
|
||||
# for img in imgs:
|
||||
# content.append(Graphs.draw_img(img))
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
|
||||
# 取df中y列为空的行
|
||||
import pandas as pd
|
||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset, 'predict.csv'), encoding='gbk')
|
||||
df_true = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||
df_true = df_true[['ds', 'y']]
|
||||
eval_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 按评估指标排序,取前五
|
||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||
# 取 fivemodels_list 和 ds 列
|
||||
@ -3142,7 +3178,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_table(col_width, *data))
|
||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||
df4 = df.copy() # 计算偏差率使用
|
||||
# 计算模型偏差率
|
||||
# 计算各列对于y列的差值百分比
|
||||
@ -3186,7 +3222,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
# 读取数据
|
||||
from scipy.stats import spearmanr
|
||||
data = pd.read_csv(os.path.join(
|
||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8')
|
||||
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8')
|
||||
# 重命名预测列
|
||||
data.rename(columns={y: 'y'}, inplace=True) # 修改
|
||||
data['ds'] = pd.to_datetime(data['ds']) # 修改
|
||||
@ -3206,7 +3242,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
# 删除空列
|
||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||
correlation_df.dropna(inplace=True)
|
||||
correlation_df.to_csv(os.path.join(config.dataset, '指标相关性分析.csv'), index=False)
|
||||
correlation_df.to_csv(os.path.join(
|
||||
config.dataset, '指标相关性分析.csv'), index=False)
|
||||
|
||||
data = correlation_df['Pearson_Correlation'].values.tolist()
|
||||
# 生成 -1 到 1 的 20 个区间
|
||||
@ -3245,7 +3282,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
plt.close()
|
||||
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
||||
# 皮尔逊正相关 不相关 负相关 的表格
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
||||
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
||||
content.append(Graphs.draw_text('''
|
||||
@ -3257,7 +3295,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_text(f'''{top10}'''))
|
||||
# 获取特征的近一月值
|
||||
feature_data_df = pd.read_csv(os.path.join(
|
||||
config.dataset,'填充后的特征数据.csv'), parse_dates=['ds']).tail(20)
|
||||
config.dataset, '填充后的特征数据.csv'), parse_dates=['ds']).tail(20)
|
||||
feature_df = feature_data_df[['ds', 'y']+top10_columns]
|
||||
# feature_df['ds'] = pd.to_datetime(df['ds'], format = '%Y-%m-%d' )
|
||||
# 遍历X每一列,和yy画散点图 ,
|
||||
@ -3363,7 +3401,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
||||
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
||||
# 皮尔逊正相关 不相关 负相关 的表格
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||
content.append(Graphs.draw_text(
|
||||
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
||||
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
||||
@ -3396,7 +3435,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_little_title('模型评估:'))
|
||||
|
||||
df = pd.read_csv(os.path.join(
|
||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
||||
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||
# 判断 df 的数值列转为float
|
||||
for col in eval_df.columns:
|
||||
if col not in ['模型(Model)']:
|
||||
@ -3422,12 +3461,13 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||
content.append(Graphs.draw_text('模型拟合:'))
|
||||
# 添加图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
|
||||
# 附1,特征列表
|
||||
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
||||
df_fuyi = pd.read_csv(os.path.join(
|
||||
config.dataset,'特征频度统计.csv'), encoding='utf-8')
|
||||
config.dataset, '特征频度统计.csv'), encoding='utf-8')
|
||||
for col in df_fuyi.columns:
|
||||
fuyi = df_fuyi[col]
|
||||
fuyi = fuyi.dropna()
|
||||
@ -3436,7 +3476,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
||||
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
||||
|
||||
# 生成pdf文件
|
||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||
doc = SimpleDocTemplate(os.path.join(
|
||||
config.dataset, reportname), pagesize=letter)
|
||||
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
||||
doc.build(content)
|
||||
# pdf 上传到数字化信息平台
|
||||
@ -3461,11 +3502,13 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
|
||||
# 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||
# 取df中y列为空的行
|
||||
from lib.dataread import loadcsv
|
||||
df = loadcsv(os.path.join(config.dataset, 'predict.csv'))
|
||||
df_true = loadcsv(os.path.join(config.dataset, '指标数据添加时间特征.csv')) # 获取预测日期对应的真实值
|
||||
df_true = loadcsv(os.path.join(
|
||||
config.dataset, '指标数据添加时间特征.csv')) # 获取预测日期对应的真实值
|
||||
df_true = df_true[['ds', 'y']]
|
||||
eval_df = loadcsv(os.path.join(config.dataset, 'model_evaluation.csv'))
|
||||
# 按评估指标排序,取前五
|
||||
@ -3568,8 +3611,10 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
# 特征工程
|
||||
# 预测列分析
|
||||
content.append(Graphs.draw_text(' 电碳价格自相关ACF和偏自相关PACF分析:'))
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标数据自相关图.png')))
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标数据偏自相关图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '指标数据自相关图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '指标数据偏自相关图.png')))
|
||||
content.append(Graphs.draw_text(' 解读:'))
|
||||
content.append(Graphs.draw_text(
|
||||
' 自相关函数的取值范围为 [-1, 1]。正值表示信号在不同时间点之间具有正相关性,负值表示信号具有负相关性,而 0 表示信号在不同时间点之间不相关。 '))
|
||||
@ -3611,7 +3656,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
# 删除空列
|
||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||
correlation_df.dropna(inplace=True)
|
||||
correlation_df.to_csv(os.path.join(config.dataset, '指标相关性分析.csv'), index=False)
|
||||
correlation_df.to_csv(os.path.join(
|
||||
config.dataset, '指标相关性分析.csv'), index=False)
|
||||
data = correlation_df['Pearson_Correlation'].values.tolist()
|
||||
# 生成 -1 到 1 的 20 个区间
|
||||
bins = np.linspace(-1, 1, 21)
|
||||
@ -3644,7 +3690,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
plt.close()
|
||||
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
||||
# 皮尔逊正相关 不相关 负相关 的表格
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
||||
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
||||
content.append(Graphs.draw_text('''
|
||||
@ -3663,7 +3710,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
||||
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
||||
# 皮尔逊正相关 不相关 负相关 的表格
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||
content.append(Graphs.draw_text(
|
||||
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
||||
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
||||
@ -3720,7 +3768,9 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,先计算预测值与真实值之差的平方,然后对这些平方差求平均值。取值越小,误差越小,预测效果越好。'))
|
||||
content.append(Graphs.draw_text('模型拟合:'))
|
||||
# 添加图片
|
||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
content.append(Graphs.draw_img(
|
||||
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||
# 生成pdf文件
|
||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
||||
doc = SimpleDocTemplate(os.path.join(
|
||||
config.dataset, reportname), pagesize=letter)
|
||||
doc.build(content)
|
||||
|
Loading…
Reference in New Issue
Block a user