更改报告名称
This commit is contained in:
parent
3f06049e70
commit
06cfab9de8
@ -2223,7 +2223,7 @@ def addtimecharacteristics(df, dataset):
|
|||||||
# 从数据库获取百川数据,接收一个百川id列表,返回df格式的数据
|
# 从数据库获取百川数据,接收一个百川id列表,返回df格式的数据
|
||||||
def get_baichuan_data(baichuanidnamedict):
|
def get_baichuan_data(baichuanidnamedict):
|
||||||
baichuanidlist = [str(k) for k in baichuanidnamedict.keys()]
|
baichuanidlist = [str(k) for k in baichuanidnamedict.keys()]
|
||||||
baichuanidlist= [f"'{id}'" for id in baichuanidlist]
|
baichuanidlist = [f"'{id}'" for id in baichuanidlist]
|
||||||
startdate = str(config.start_year)+'0101'
|
startdate = str(config.start_year)+'0101'
|
||||||
# 连接数据库
|
# 连接数据库
|
||||||
db = config.db_mysql
|
db = config.db_mysql
|
||||||
@ -2237,7 +2237,7 @@ def get_baichuan_data(baichuanidnamedict):
|
|||||||
df = pd.DataFrame(results, columns=[
|
df = pd.DataFrame(results, columns=[
|
||||||
'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE'])
|
'BAICHUAN_ID', 'DATA_DATE', 'DATA_VALUE'])
|
||||||
df['BAICHUAN_ID'] = df['BAICHUAN_ID'].astype('string')
|
df['BAICHUAN_ID'] = df['BAICHUAN_ID'].astype('string')
|
||||||
df.to_csv('百川数据test.csv', index=False)
|
df.to_csv(os.path.join(config.dataset, '百川数据.csv'), index=False)
|
||||||
|
|
||||||
# 按BAICHUAN_ID 进行分组,然后按DATA_DATE合并
|
# 按BAICHUAN_ID 进行分组,然后按DATA_DATE合并
|
||||||
df1 = pd.DataFrame(columns=['DATA_DATE'])
|
df1 = pd.DataFrame(columns=['DATA_DATE'])
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
from lib.dataread import *
|
from lib.dataread import *
|
||||||
from config_shiyoujiao_lvyong import *
|
from config_shiyoujiao_lvyong import *
|
||||||
from lib.tools import SendMail, exception_logger
|
from lib.tools import SendMail, exception_logger
|
||||||
from models.nerulforcastmodels import ex_Model, model_losss,shiyoujiao_lvyong_export_pdf
|
from models.nerulforcastmodels import ex_Model, model_losss, shiyoujiao_lvyong_export_pdf
|
||||||
import datetime
|
import datetime
|
||||||
import torch
|
import torch
|
||||||
torch.set_float32_matmul_precision("high")
|
torch.set_float32_matmul_precision("high")
|
||||||
@ -422,17 +422,17 @@ def predict_main():
|
|||||||
# 模型报告
|
# 模型报告
|
||||||
logger.info('制作报告ing')
|
logger.info('制作报告ing')
|
||||||
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
title = f'{settings}--{end_time}-预测报告' # 报告标题
|
||||||
reportname = f'石油焦铝用大模型日度预测--{end_time}.pdf' # 报告文件名
|
reportname = '石油焦铝用渠道.pdf' # 报告文件名
|
||||||
reportname = reportname.replace(':', '-') # 替换冒号
|
# reportname = f'石油焦铝用大模型日度预测--{end_time}.pdf' # 报告文件名
|
||||||
|
# reportname = reportname.replace(':', '-') # 替换冒号
|
||||||
shiyoujiao_lvyong_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time,
|
shiyoujiao_lvyong_export_pdf(dataset=dataset, num_models=5 if is_fivemodels else 22, time=end_time,
|
||||||
reportname=reportname, sqlitedb=sqlitedb),
|
reportname=reportname, sqlitedb=sqlitedb),
|
||||||
|
|
||||||
logger.info('制作报告end')
|
logger.info('制作报告end')
|
||||||
logger.info('模型训练完成')
|
logger.info('模型训练完成')
|
||||||
|
|
||||||
push_market_value()
|
push_market_value()
|
||||||
|
|
||||||
|
|
||||||
# 发送邮件
|
# 发送邮件
|
||||||
# m = SendMail(
|
# m = SendMail(
|
||||||
# username=username,
|
# username=username,
|
||||||
|
@ -194,7 +194,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
|||||||
nf_preds = nf.cross_validation(
|
nf_preds = nf.cross_validation(
|
||||||
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
||||||
nf_preds.to_csv(os.path.join(
|
nf_preds.to_csv(os.path.join(
|
||||||
config.dataset,"cross_validation.csv"), index=False)
|
config.dataset, "cross_validation.csv"), index=False)
|
||||||
|
|
||||||
nf_preds = nf_preds.reset_index()
|
nf_preds = nf_preds.reset_index()
|
||||||
# 保存模型
|
# 保存模型
|
||||||
@ -208,7 +208,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
|||||||
# glob获取dataset下最新的joblib文件
|
# glob获取dataset下最新的joblib文件
|
||||||
import glob
|
import glob
|
||||||
filename = max(glob.glob(os.path.join(
|
filename = max(glob.glob(os.path.join(
|
||||||
config.dataset,'*.joblib')), key=os.path.getctime)
|
config.dataset, '*.joblib')), key=os.path.getctime)
|
||||||
config.logger.info('读取模型:' + filename)
|
config.logger.info('读取模型:' + filename)
|
||||||
nf = load(filename)
|
nf = load(filename)
|
||||||
# 测试集预测
|
# 测试集预测
|
||||||
@ -216,7 +216,7 @@ def ex_Model(df, horizon, input_size, train_steps, val_check_steps, early_stop_p
|
|||||||
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
||||||
# 测试集预测结果保存
|
# 测试集预测结果保存
|
||||||
nf_test_preds.to_csv(os.path.join(
|
nf_test_preds.to_csv(os.path.join(
|
||||||
config.dataset,"cross_validation.csv"), index=False)
|
config.dataset, "cross_validation.csv"), index=False)
|
||||||
|
|
||||||
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
||||||
|
|
||||||
@ -412,7 +412,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
nf_preds = nf.cross_validation(
|
nf_preds = nf.cross_validation(
|
||||||
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
df=df_train, val_size=val_size, test_size=test_size, n_windows=None)
|
||||||
nf_preds.to_csv(os.path.join(
|
nf_preds.to_csv(os.path.join(
|
||||||
config.dataset,"cross_validation.csv"), index=False)
|
config.dataset, "cross_validation.csv"), index=False)
|
||||||
|
|
||||||
nf_preds = nf_preds.reset_index()
|
nf_preds = nf_preds.reset_index()
|
||||||
# 保存模型
|
# 保存模型
|
||||||
@ -425,7 +425,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
# glob获取dataset下最新的joblib文件
|
# glob获取dataset下最新的joblib文件
|
||||||
import glob
|
import glob
|
||||||
filename = max(glob.glob(os.path.join(
|
filename = max(glob.glob(os.path.join(
|
||||||
config.dataset,'*.joblib')), key=os.path.getctime)
|
config.dataset, '*.joblib')), key=os.path.getctime)
|
||||||
config.logger.info('读取模型:' + filename)
|
config.logger.info('读取模型:' + filename)
|
||||||
nf = load(filename)
|
nf = load(filename)
|
||||||
# 测试集预测
|
# 测试集预测
|
||||||
@ -433,7 +433,7 @@ def ex_Model_Juxiting(df, horizon, input_size, train_steps, val_check_steps, ear
|
|||||||
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
df=df_test, val_size=val_size, test_size=test_size, n_windows=None)
|
||||||
# 测试集预测结果保存
|
# 测试集预测结果保存
|
||||||
nf_test_preds.to_csv(os.path.join(
|
nf_test_preds.to_csv(os.path.join(
|
||||||
config.dataset,"cross_validation.csv"), index=False)
|
config.dataset, "cross_validation.csv"), index=False)
|
||||||
|
|
||||||
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
df_test['ds'] = pd.to_datetime(df_test['ds'], errors='coerce')
|
||||||
|
|
||||||
@ -537,7 +537,7 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
model_results3 = model_results3.sort_values(
|
model_results3 = model_results3.sort_values(
|
||||||
by='平均平方误差(MSE)', ascending=True)
|
by='平均平方误差(MSE)', ascending=True)
|
||||||
model_results3.to_csv(os.path.join(
|
model_results3.to_csv(os.path.join(
|
||||||
config.dataset,"model_evaluation.csv"), index=False)
|
config.dataset, "model_evaluation.csv"), index=False)
|
||||||
modelnames = model_results3['模型(Model)'].tolist()
|
modelnames = model_results3['模型(Model)'].tolist()
|
||||||
allmodelnames = modelnames.copy()
|
allmodelnames = modelnames.copy()
|
||||||
# 保存5个最佳模型的名称
|
# 保存5个最佳模型的名称
|
||||||
@ -561,7 +561,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
plt.title(model+'拟合')
|
plt.title(model+'拟合')
|
||||||
plt.subplots_adjust(hspace=0.5)
|
plt.subplots_adjust(hspace=0.5)
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# # 历史数据+预测数据
|
# # 历史数据+预测数据
|
||||||
@ -714,7 +715,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
sqlitedb.update_data(
|
sqlitedb.update_data(
|
||||||
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
||||||
except:
|
except:
|
||||||
config.logger.error(f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
config.logger.error(
|
||||||
|
f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||||
|
|
||||||
df = accuracy_df.copy()
|
df = accuracy_df.copy()
|
||||||
df['ds'] = pd.to_datetime(df['ds'])
|
df['ds'] = pd.to_datetime(df['ds'])
|
||||||
@ -803,7 +805,7 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
df_combined3.to_csv(os.path.join(
|
df_combined3.to_csv(os.path.join(
|
||||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||||
|
|
||||||
# 历史价格+预测价格
|
# 历史价格+预测价格
|
||||||
sqlitedb.drop_table('testandpredict_groupby')
|
sqlitedb.drop_table('testandpredict_groupby')
|
||||||
@ -837,7 +839,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
|
|
||||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_modeltopten_predict_ture(df):
|
def _plt_modeltopten_predict_ture(df):
|
||||||
@ -866,7 +869,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||||
|
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||||
|
color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
@ -892,7 +896,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_model_results3():
|
def _plt_model_results3():
|
||||||
@ -907,7 +912,8 @@ def model_losss_yongan(sqlitedb, end_time, table_name_prefix):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
_plt_predict_ture(df_combined3)
|
_plt_predict_ture(df_combined3)
|
||||||
@ -939,7 +945,8 @@ def model_losss(sqlitedb, end_time):
|
|||||||
if len(df_combined) < 100:
|
if len(df_combined) < 100:
|
||||||
len(df_combined) + ''
|
len(df_combined) + ''
|
||||||
except:
|
except:
|
||||||
df_combined = loadcsv(os.path.join(config.dataset, "cross_validation.csv"))
|
df_combined = loadcsv(os.path.join(
|
||||||
|
config.dataset, "cross_validation.csv"))
|
||||||
df_combined = dateConvert(df_combined)
|
df_combined = dateConvert(df_combined)
|
||||||
df_combined['CREAT_DATE'] = df_combined['cutoff']
|
df_combined['CREAT_DATE'] = df_combined['cutoff']
|
||||||
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
|
df_combined4 = df_combined.copy() # 备份df_combined,后面画图需要
|
||||||
@ -1297,7 +1304,8 @@ def model_losss(sqlitedb, end_time):
|
|||||||
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
# plt.plot(df['ds'], df[model], label=model,marker='o')
|
||||||
plt.plot(df['ds'], df[most_model_name], label=model, marker='o')
|
plt.plot(df['ds'], df[most_model_name], label=model, marker='o')
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||||
|
color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
# 设置横轴日期格式为年-月-日
|
# 设置横轴日期格式为年-月-日
|
||||||
@ -1338,7 +1346,8 @@ def model_losss(sqlitedb, end_time):
|
|||||||
plt.text(i, j, str(j), ha='center', va='bottom')
|
plt.text(i, j, str(j), ha='center', va='bottom')
|
||||||
|
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||||
|
color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
# 自动设置横轴日期显示
|
# 自动设置横轴日期显示
|
||||||
@ -1458,7 +1467,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
model_results3 = model_results3.sort_values(
|
model_results3 = model_results3.sort_values(
|
||||||
by='平均平方误差(MSE)', ascending=True)
|
by='平均平方误差(MSE)', ascending=True)
|
||||||
model_results3.to_csv(os.path.join(
|
model_results3.to_csv(os.path.join(
|
||||||
config.dataset,"model_evaluation.csv"), index=False)
|
config.dataset, "model_evaluation.csv"), index=False)
|
||||||
modelnames = model_results3['模型(Model)'].tolist()
|
modelnames = model_results3['模型(Model)'].tolist()
|
||||||
allmodelnames = modelnames.copy()
|
allmodelnames = modelnames.copy()
|
||||||
# 保存5个最佳模型的名称
|
# 保存5个最佳模型的名称
|
||||||
@ -1482,7 +1491,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
plt.title(model+'拟合')
|
plt.title(model+'拟合')
|
||||||
plt.subplots_adjust(hspace=0.5)
|
plt.subplots_adjust(hspace=0.5)
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# # 历史数据+预测数据
|
# # 历史数据+预测数据
|
||||||
@ -1635,7 +1645,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
sqlitedb.update_data(
|
sqlitedb.update_data(
|
||||||
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
'accuracy', f"min_price = {row['min_price'].values[0]},max_price = {row['max_price'].values[0]},mean={row['mean'].values[0]}", f"id = {id}")
|
||||||
except:
|
except:
|
||||||
config.logger.error(f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
config.logger.error(
|
||||||
|
f'更新accuracy表中的min_price,max_price,mean值失败,row={row}')
|
||||||
|
|
||||||
df = accuracy_df.copy()
|
df = accuracy_df.copy()
|
||||||
df['ds'] = pd.to_datetime(df['ds'])
|
df['ds'] = pd.to_datetime(df['ds'])
|
||||||
@ -1724,7 +1735,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
df_combined3.to_csv(os.path.join(
|
df_combined3.to_csv(os.path.join(
|
||||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||||
|
|
||||||
# 历史价格+预测价格
|
# 历史价格+预测价格
|
||||||
sqlitedb.drop_table('testandpredict_groupby')
|
sqlitedb.drop_table('testandpredict_groupby')
|
||||||
@ -1768,7 +1779,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
plt.xticks(rotation=45) # 日期标签旋转45度,防止重叠
|
plt.xticks(rotation=45) # 日期标签旋转45度,防止重叠
|
||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
|
|
||||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_modeltopten_predict_ture(df):
|
def _plt_modeltopten_predict_ture(df):
|
||||||
@ -1828,7 +1840,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_model_results3():
|
def _plt_model_results3():
|
||||||
@ -1843,7 +1856,8 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
_plt_predict_ture(df_combined3)
|
_plt_predict_ture(df_combined3)
|
||||||
@ -1856,7 +1870,7 @@ def model_losss_juxitingbak(sqlitedb, end_time):
|
|||||||
|
|
||||||
# 聚烯烃计算预测评估指数
|
# 聚烯烃计算预测评估指数
|
||||||
@exception_logger
|
@exception_logger
|
||||||
def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
def model_losss_juxiting(sqlitedb, end_time, is_fivemodels):
|
||||||
global dataset
|
global dataset
|
||||||
global rote
|
global rote
|
||||||
most_model = [sqlitedb.select_data('most_model', columns=[
|
most_model = [sqlitedb.select_data('most_model', columns=[
|
||||||
@ -1934,7 +1948,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
|||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
plt.title(model+'拟合')
|
plt.title(model+'拟合')
|
||||||
plt.subplots_adjust(hspace=0.5)
|
plt.subplots_adjust(hspace=0.5)
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值与真实值对比图.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
# # 历史数据+预测数据
|
# # 历史数据+预测数据
|
||||||
@ -2062,7 +2077,7 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
df_combined3.to_csv(os.path.join(
|
df_combined3.to_csv(os.path.join(
|
||||||
config.dataset,"testandpredict_groupby.csv"), index=False)
|
config.dataset, "testandpredict_groupby.csv"), index=False)
|
||||||
|
|
||||||
# 历史价格+预测价格
|
# 历史价格+预测价格
|
||||||
sqlitedb.drop_table('testandpredict_groupby')
|
sqlitedb.drop_table('testandpredict_groupby')
|
||||||
@ -2093,12 +2108,14 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
|||||||
for model in most_model:
|
for model in most_model:
|
||||||
plt.plot(df['ds'], df[model], label=model, marker='o')
|
plt.plot(df['ds'], df[model], label=model, marker='o')
|
||||||
# 当前日期画竖虚线
|
# 当前日期画竖虚线
|
||||||
plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--')
|
plt.axvline(x=df['ds'].iloc[-config.horizon],
|
||||||
|
color='r', linestyle='--')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel('日期')
|
plt.xlabel('日期')
|
||||||
plt.ylabel('价格')
|
plt.ylabel('价格')
|
||||||
|
|
||||||
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '历史价格-预测值.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_predict_table(df):
|
def _plt_predict_table(df):
|
||||||
@ -2118,7 +2135,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '预测值表格.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '预测值表格.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
def _plt_model_results3():
|
def _plt_model_results3():
|
||||||
@ -2133,7 +2151,8 @@ def model_losss_juxiting(sqlitedb,end_time,is_fivemodels):
|
|||||||
table.set_fontsize(10)
|
table.set_fontsize(10)
|
||||||
|
|
||||||
# 设置表格样式,列数据最小的用绿色标识
|
# 设置表格样式,列数据最小的用绿色标识
|
||||||
plt.savefig(os.path.join(config.dataset, '模型评估.png'), bbox_inches='tight')
|
plt.savefig(os.path.join(config.dataset, '模型评估.png'),
|
||||||
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
_plt_predict_ture(df_combined3)
|
_plt_predict_ture(df_combined3)
|
||||||
@ -2151,7 +2170,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
# 获取特征的近一月值
|
# 获取特征的近一月值
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
feature_data_df = pd.read_csv(os.path.join(
|
feature_data_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||||
|
|
||||||
def draw_feature_trend(feature_data_df, features):
|
def draw_feature_trend(feature_data_df, features):
|
||||||
# 画特征近60天的趋势图
|
# 画特征近60天的趋势图
|
||||||
@ -2214,26 +2233,29 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
# 预测结果
|
# 预测结果
|
||||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||||
# 添加历史走势及预测价格的走势图片
|
# 添加历史走势及预测价格的走势图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
# 波动率画图逻辑
|
# 波动率画图逻辑
|
||||||
content.append(Graphs.draw_text('图示说明:'))
|
content.append(Graphs.draw_text('图示说明:'))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||||
|
|
||||||
# 添加历史走势及预测价格的走势图片
|
# 添加历史走势及预测价格的走势图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值1.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值1.png')))
|
||||||
content.append(Graphs.draw_text('图示说明:'))
|
content.append(Graphs.draw_text('图示说明:'))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
' 确定置信区间:使用模型评估指标MAE得到前十个模型,取平均值上下1.5作为价格波动置信区间;'))
|
' 确定置信区间:使用模型评估指标MAE得到前十个模型,取平均值上下1.5作为价格波动置信区间;'))
|
||||||
|
|
||||||
# 取df中y列为空的行
|
# 取df中y列为空的行
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset, 'predict.csv'), encoding='gbk')
|
||||||
df_true = pd.read_csv(os.path.join(
|
df_true = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||||
df_true = df_true[['ds', 'y']]
|
df_true = df_true[['ds', 'y']]
|
||||||
eval_df = pd.read_csv(os.path.join(
|
eval_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 按评估指标排序,取前五
|
# 按评估指标排序,取前五
|
||||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||||
# 取 fivemodels_list 和 ds 列
|
# 取 fivemodels_list 和 ds 列
|
||||||
@ -2261,7 +2283,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_table(col_width, *data))
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||||
df4 = df.copy() # 计算偏差率使用
|
df4 = df.copy() # 计算偏差率使用
|
||||||
# 去掉created_dt 列
|
# 去掉created_dt 列
|
||||||
df4 = df4.drop(columns=['created_dt'])
|
df4 = df4.drop(columns=['created_dt'])
|
||||||
@ -2313,7 +2335,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||||
encoding='utf-8') # 计算相关系数用
|
encoding='utf-8') # 计算相关系数用
|
||||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||||
|
|
||||||
# 绘制特征相关气泡图
|
# 绘制特征相关气泡图
|
||||||
@ -2345,7 +2367,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
plt.xlabel('相关系数')
|
plt.xlabel('相关系数')
|
||||||
plt.ylabel('频数')
|
plt.ylabel('频数')
|
||||||
plt.savefig(os.path.join(
|
plt.savefig(os.path.join(
|
||||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(
|
content.append(Graphs.draw_img(
|
||||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||||
@ -2404,7 +2426,8 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||||
bbox_inches='tight')
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
content.append(Graphs.draw_img(os.path.join(
|
||||||
|
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||||
@ -2420,7 +2443,7 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_text(introduction))
|
content.append(Graphs.draw_text(introduction))
|
||||||
content.append(Graphs.draw_little_title('模型评估:'))
|
content.append(Graphs.draw_little_title('模型评估:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 判断 df 的数值列转为float
|
# 判断 df 的数值列转为float
|
||||||
for col in eval_df.columns:
|
for col in eval_df.columns:
|
||||||
if col not in ['模型(Model)']:
|
if col not in ['模型(Model)']:
|
||||||
@ -2446,9 +2469,11 @@ def brent_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
content.append(Graphs.draw_text('模型拟合:'))
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
# 添加图片
|
# 添加图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
# 生成pdf文件
|
# 生成pdf文件
|
||||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
doc = SimpleDocTemplate(os.path.join(
|
||||||
|
config.dataset, reportname), pagesize=letter)
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
# pdf 上传到数字化信息平台
|
# pdf 上传到数字化信息平台
|
||||||
try:
|
try:
|
||||||
@ -2471,7 +2496,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
# 获取特征的近一月值
|
# 获取特征的近一月值
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
feature_data_df = pd.read_csv(os.path.join(
|
feature_data_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(60)
|
||||||
|
|
||||||
def draw_feature_trend(feature_data_df, features):
|
def draw_feature_trend(feature_data_df, features):
|
||||||
# 画特征近60天的趋势图
|
# 画特征近60天的趋势图
|
||||||
@ -2529,26 +2554,27 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
|
# print(f'绘制第{i+1}个特征{col}与价格散点图时出错:{e}')
|
||||||
|
|
||||||
# 添加标题
|
# 添加标题
|
||||||
content.append(Graphs.draw_title(f'{config.y}{time}预测报告'))
|
content.append(Graphs.draw_title('石油焦铝用渠道'))
|
||||||
|
|
||||||
# 预测结果
|
# 预测结果
|
||||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||||
# 添加历史走势及预测价格的走势图片
|
# 添加历史走势及预测价格的走势图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
# 波动率画图逻辑
|
# 波动率画图逻辑
|
||||||
content.append(Graphs.draw_text('图示说明:'))
|
content.append(Graphs.draw_text('图示说明:'))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
' 确定置信区间:设置残差置信阈值,以每周最佳模型为基准,选取在置信区间的预测值作为置信区间;'))
|
||||||
|
|
||||||
|
|
||||||
# 取df中y列为空的行
|
# 取df中y列为空的行
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset, 'predict.csv'), encoding='gbk')
|
||||||
df_true = pd.read_csv(os.path.join(
|
df_true = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||||
df_true = df_true[['ds', 'y']]
|
df_true = df_true[['ds', 'y']]
|
||||||
eval_df = pd.read_csv(os.path.join(
|
eval_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 按评估指标排序,取前五
|
# 按评估指标排序,取前五
|
||||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||||
# 取 fivemodels_list 和 ds 列
|
# 取 fivemodels_list 和 ds 列
|
||||||
@ -2576,7 +2602,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
content.append(Graphs.draw_table(col_width, *data))
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||||
df4 = df.copy() # 计算偏差率使用
|
df4 = df.copy() # 计算偏差率使用
|
||||||
# 去掉created_dt 列
|
# 去掉created_dt 列
|
||||||
df4 = df4.drop(columns=['created_dt'])
|
df4 = df4.drop(columns=['created_dt'])
|
||||||
@ -2628,7 +2654,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||||
encoding='utf-8') # 计算相关系数用
|
encoding='utf-8') # 计算相关系数用
|
||||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||||
|
|
||||||
# 绘制特征相关气泡图
|
# 绘制特征相关气泡图
|
||||||
@ -2660,7 +2686,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
plt.xlabel('相关系数')
|
plt.xlabel('相关系数')
|
||||||
plt.ylabel('频数')
|
plt.ylabel('频数')
|
||||||
plt.savefig(os.path.join(
|
plt.savefig(os.path.join(
|
||||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(
|
content.append(Graphs.draw_img(
|
||||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||||
@ -2719,7 +2745,8 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||||
bbox_inches='tight')
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
content.append(Graphs.draw_img(os.path.join(
|
||||||
|
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||||
@ -2735,7 +2762,7 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
content.append(Graphs.draw_text(introduction))
|
content.append(Graphs.draw_text(introduction))
|
||||||
content.append(Graphs.draw_little_title('模型评估:'))
|
content.append(Graphs.draw_little_title('模型评估:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 判断 df 的数值列转为float
|
# 判断 df 的数值列转为float
|
||||||
for col in eval_df.columns:
|
for col in eval_df.columns:
|
||||||
if col not in ['模型(Model)']:
|
if col not in ['模型(Model)']:
|
||||||
@ -2761,9 +2788,11 @@ def shiyoujiao_lvyong_export_pdf(num_indicators=475, num_models=21, num_dayindic
|
|||||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
content.append(Graphs.draw_text('模型拟合:'))
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
# 添加图片
|
# 添加图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
# 生成pdf文件
|
# 生成pdf文件
|
||||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
doc = SimpleDocTemplate(os.path.join(
|
||||||
|
config.dataset, reportname), pagesize=letter)
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
# pdf 上传到数字化信息平台
|
# pdf 上传到数字化信息平台
|
||||||
try:
|
try:
|
||||||
@ -2785,7 +2814,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
# 获取特征的近一月值
|
# 获取特征的近一月值
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
feature_data_df = pd.read_csv(os.path.join(
|
feature_data_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), parse_dates=['ds']).tail(20)
|
config.dataset, '指标数据添加时间特征.csv'), parse_dates=['ds']).tail(20)
|
||||||
|
|
||||||
def draw_feature_trend(feature_data_df, features):
|
def draw_feature_trend(feature_data_df, features):
|
||||||
# 画特征近一周的趋势图
|
# 画特征近一周的趋势图
|
||||||
@ -2845,16 +2874,18 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
# 预测结果
|
# 预测结果
|
||||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||||
# 添加历史走势及预测价格的走势图片
|
# 添加历史走势及预测价格的走势图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
|
|
||||||
# 取df中y列为空的行
|
# 取df中y列为空的行
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset, 'predict.csv'), encoding='gbk')
|
||||||
df_true = pd.read_csv(os.path.join(
|
df_true = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||||
df_true = df_true[['ds', 'y']]
|
df_true = df_true[['ds', 'y']]
|
||||||
eval_df = pd.read_csv(os.path.join(
|
eval_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 按评估指标排序,取前五
|
# 按评估指标排序,取前五
|
||||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||||
# 取 fivemodels_list 和 ds 列
|
# 取 fivemodels_list 和 ds 列
|
||||||
@ -2882,7 +2913,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
content.append(Graphs.draw_table(col_width, *data))
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||||
df4 = df.copy() # 计算偏差率使用
|
df4 = df.copy() # 计算偏差率使用
|
||||||
# 计算模型偏差率
|
# 计算模型偏差率
|
||||||
# 计算各列对于y列的差值百分比
|
# 计算各列对于y列的差值百分比
|
||||||
@ -2925,7 +2956,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
data = pd.read_csv(os.path.join(config.dataset, '指标数据添加时间特征.csv'),
|
||||||
encoding='utf-8') # 计算相关系数用
|
encoding='utf-8') # 计算相关系数用
|
||||||
df_zhibiaofenlei = loadcsv(os.path.join(
|
df_zhibiaofenlei = loadcsv(os.path.join(
|
||||||
config.dataset,'特征处理后的指标名称及分类.csv')) # 气泡图用
|
config.dataset, '特征处理后的指标名称及分类.csv')) # 气泡图用
|
||||||
df_zhibiaoshuju = data.copy() # 气泡图用
|
df_zhibiaoshuju = data.copy() # 气泡图用
|
||||||
|
|
||||||
# 绘制特征相关气泡图
|
# 绘制特征相关气泡图
|
||||||
@ -2957,7 +2988,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
plt.xlabel('相关系数')
|
plt.xlabel('相关系数')
|
||||||
plt.ylabel('频数')
|
plt.ylabel('频数')
|
||||||
plt.savefig(os.path.join(
|
plt.savefig(os.path.join(
|
||||||
config.dataset,f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
config.dataset, f'{name}类指标相关性直方分布图.png'), bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(
|
content.append(Graphs.draw_img(
|
||||||
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
os.path.join(config.dataset, f'{name}类指标相关性直方分布图.png')))
|
||||||
@ -3017,7 +3048,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
plt.savefig(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png'),
|
||||||
bbox_inches='tight')
|
bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标分类相关性总和的气泡图.png')))
|
content.append(Graphs.draw_img(os.path.join(
|
||||||
|
config.dataset, '指标分类相关性总和的气泡图.png')))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
'气泡图中,横轴为指标分类,纵轴为指标分类下的特征数量,气泡的面积越大表示该分类中特征的相关系数和越大。'))
|
||||||
config.logger.info(f'绘制相关性总和的气泡图结束')
|
config.logger.info(f'绘制相关性总和的气泡图结束')
|
||||||
@ -3037,7 +3069,7 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
content.append(Graphs.draw_little_title('模型评估:'))
|
content.append(Graphs.draw_little_title('模型评估:'))
|
||||||
|
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 判断 df 的数值列转为float
|
# 判断 df 的数值列转为float
|
||||||
for col in eval_df.columns:
|
for col in eval_df.columns:
|
||||||
if col not in ['模型(Model)']:
|
if col not in ['模型(Model)']:
|
||||||
@ -3059,12 +3091,13 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
content.append(Graphs.draw_text('模型拟合:'))
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
|
|
||||||
# 附1,特征列表
|
# 附1,特征列表
|
||||||
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
||||||
df_fuyi = pd.read_csv(os.path.join(
|
df_fuyi = pd.read_csv(os.path.join(
|
||||||
config.dataset,'特征频度统计.csv'), encoding='utf-8')
|
config.dataset, '特征频度统计.csv'), encoding='utf-8')
|
||||||
for col in df_fuyi.columns:
|
for col in df_fuyi.columns:
|
||||||
fuyi = df_fuyi[col]
|
fuyi = df_fuyi[col]
|
||||||
fuyi = fuyi.dropna()
|
fuyi = fuyi.dropna()
|
||||||
@ -3073,7 +3106,8 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
|||||||
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
||||||
|
|
||||||
# 生成pdf文件
|
# 生成pdf文件
|
||||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
doc = SimpleDocTemplate(os.path.join(
|
||||||
|
config.dataset, reportname), pagesize=letter)
|
||||||
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
# pdf 上传到数字化信息平台
|
# pdf 上传到数字化信息平台
|
||||||
@ -3105,16 +3139,18 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
# imgs = glob.glob(os.path.join(config.dataset,'*历史价格-预测值.png'))
|
# imgs = glob.glob(os.path.join(config.dataset,'*历史价格-预测值.png'))
|
||||||
# for img in imgs:
|
# for img in imgs:
|
||||||
# content.append(Graphs.draw_img(img))
|
# content.append(Graphs.draw_img(img))
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
|
|
||||||
# 取df中y列为空的行
|
# 取df中y列为空的行
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv(os.path.join(config.dataset, 'predict.csv'), encoding='gbk')
|
df = pd.read_csv(os.path.join(
|
||||||
|
config.dataset, 'predict.csv'), encoding='gbk')
|
||||||
df_true = pd.read_csv(os.path.join(
|
df_true = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8') # 获取预测日期对应的真实值
|
||||||
df_true = df_true[['ds', 'y']]
|
df_true = df_true[['ds', 'y']]
|
||||||
eval_df = pd.read_csv(os.path.join(
|
eval_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 按评估指标排序,取前五
|
# 按评估指标排序,取前五
|
||||||
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
fivemodels_list = eval_df['模型(Model)'].values # 列表形式,后面当作列名索引使用
|
||||||
# 取 fivemodels_list 和 ds 列
|
# 取 fivemodels_list 和 ds 列
|
||||||
@ -3142,7 +3178,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_table(col_width, *data))
|
content.append(Graphs.draw_table(col_width, *data))
|
||||||
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
content.append(Graphs.draw_little_title('二、上一预测周期偏差率分析:'))
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'testandpredict_groupby.csv'), encoding='utf-8')
|
config.dataset, 'testandpredict_groupby.csv'), encoding='utf-8')
|
||||||
df4 = df.copy() # 计算偏差率使用
|
df4 = df.copy() # 计算偏差率使用
|
||||||
# 计算模型偏差率
|
# 计算模型偏差率
|
||||||
# 计算各列对于y列的差值百分比
|
# 计算各列对于y列的差值百分比
|
||||||
@ -3186,7 +3222,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
# 读取数据
|
# 读取数据
|
||||||
from scipy.stats import spearmanr
|
from scipy.stats import spearmanr
|
||||||
data = pd.read_csv(os.path.join(
|
data = pd.read_csv(os.path.join(
|
||||||
config.dataset,'指标数据添加时间特征.csv'), encoding='utf-8')
|
config.dataset, '指标数据添加时间特征.csv'), encoding='utf-8')
|
||||||
# 重命名预测列
|
# 重命名预测列
|
||||||
data.rename(columns={y: 'y'}, inplace=True) # 修改
|
data.rename(columns={y: 'y'}, inplace=True) # 修改
|
||||||
data['ds'] = pd.to_datetime(data['ds']) # 修改
|
data['ds'] = pd.to_datetime(data['ds']) # 修改
|
||||||
@ -3206,7 +3242,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
# 删除空列
|
# 删除空列
|
||||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||||
correlation_df.dropna(inplace=True)
|
correlation_df.dropna(inplace=True)
|
||||||
correlation_df.to_csv(os.path.join(config.dataset, '指标相关性分析.csv'), index=False)
|
correlation_df.to_csv(os.path.join(
|
||||||
|
config.dataset, '指标相关性分析.csv'), index=False)
|
||||||
|
|
||||||
data = correlation_df['Pearson_Correlation'].values.tolist()
|
data = correlation_df['Pearson_Correlation'].values.tolist()
|
||||||
# 生成 -1 到 1 的 20 个区间
|
# 生成 -1 到 1 的 20 个区间
|
||||||
@ -3245,7 +3282,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
||||||
# 皮尔逊正相关 不相关 负相关 的表格
|
# 皮尔逊正相关 不相关 负相关 的表格
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||||
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
||||||
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
||||||
content.append(Graphs.draw_text('''
|
content.append(Graphs.draw_text('''
|
||||||
@ -3257,7 +3295,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_text(f'''{top10}'''))
|
content.append(Graphs.draw_text(f'''{top10}'''))
|
||||||
# 获取特征的近一月值
|
# 获取特征的近一月值
|
||||||
feature_data_df = pd.read_csv(os.path.join(
|
feature_data_df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'填充后的特征数据.csv'), parse_dates=['ds']).tail(20)
|
config.dataset, '填充后的特征数据.csv'), parse_dates=['ds']).tail(20)
|
||||||
feature_df = feature_data_df[['ds', 'y']+top10_columns]
|
feature_df = feature_data_df[['ds', 'y']+top10_columns]
|
||||||
# feature_df['ds'] = pd.to_datetime(df['ds'], format = '%Y-%m-%d' )
|
# feature_df['ds'] = pd.to_datetime(df['ds'], format = '%Y-%m-%d' )
|
||||||
# 遍历X每一列,和yy画散点图 ,
|
# 遍历X每一列,和yy画散点图 ,
|
||||||
@ -3363,7 +3401,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
||||||
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
||||||
# 皮尔逊正相关 不相关 负相关 的表格
|
# 皮尔逊正相关 不相关 负相关 的表格
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
||||||
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
||||||
@ -3396,7 +3435,7 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_little_title('模型评估:'))
|
content.append(Graphs.draw_little_title('模型评估:'))
|
||||||
|
|
||||||
df = pd.read_csv(os.path.join(
|
df = pd.read_csv(os.path.join(
|
||||||
config.dataset,'model_evaluation.csv'), encoding='utf-8')
|
config.dataset, 'model_evaluation.csv'), encoding='utf-8')
|
||||||
# 判断 df 的数值列转为float
|
# 判断 df 的数值列转为float
|
||||||
for col in eval_df.columns:
|
for col in eval_df.columns:
|
||||||
if col not in ['模型(Model)']:
|
if col not in ['模型(Model)']:
|
||||||
@ -3422,12 +3461,13 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,取值越小,误差越小,预测效果越好。'))
|
||||||
content.append(Graphs.draw_text('模型拟合:'))
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
# 添加图片
|
# 添加图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
|
|
||||||
# 附1,特征列表
|
# 附1,特征列表
|
||||||
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
content.append(Graphs.draw_little_title('附1、特征列表:'))
|
||||||
df_fuyi = pd.read_csv(os.path.join(
|
df_fuyi = pd.read_csv(os.path.join(
|
||||||
config.dataset,'特征频度统计.csv'), encoding='utf-8')
|
config.dataset, '特征频度统计.csv'), encoding='utf-8')
|
||||||
for col in df_fuyi.columns:
|
for col in df_fuyi.columns:
|
||||||
fuyi = df_fuyi[col]
|
fuyi = df_fuyi[col]
|
||||||
fuyi = fuyi.dropna()
|
fuyi = fuyi.dropna()
|
||||||
@ -3436,7 +3476,8 @@ def pp_export_pdf_v1(num_indicators=475, num_models=21, num_dayindicator=202, in
|
|||||||
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
content.append(Graphs.draw_text(f'{i+1}、{fuyi[i]}'))
|
||||||
|
|
||||||
# 生成pdf文件
|
# 生成pdf文件
|
||||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
doc = SimpleDocTemplate(os.path.join(
|
||||||
|
config.dataset, reportname), pagesize=letter)
|
||||||
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
# doc = SimpleDocTemplate(os.path.join(config.dataset,'reportname.pdf'), pagesize=letter)
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
# pdf 上传到数字化信息平台
|
# pdf 上传到数字化信息平台
|
||||||
@ -3461,11 +3502,13 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
|
content.append(Graphs.draw_title(f'{y}{end_time}预测报告'))
|
||||||
# 预测结果
|
# 预测结果
|
||||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '历史价格-预测值.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '历史价格-预测值.png')))
|
||||||
# 取df中y列为空的行
|
# 取df中y列为空的行
|
||||||
from lib.dataread import loadcsv
|
from lib.dataread import loadcsv
|
||||||
df = loadcsv(os.path.join(config.dataset, 'predict.csv'))
|
df = loadcsv(os.path.join(config.dataset, 'predict.csv'))
|
||||||
df_true = loadcsv(os.path.join(config.dataset, '指标数据添加时间特征.csv')) # 获取预测日期对应的真实值
|
df_true = loadcsv(os.path.join(
|
||||||
|
config.dataset, '指标数据添加时间特征.csv')) # 获取预测日期对应的真实值
|
||||||
df_true = df_true[['ds', 'y']]
|
df_true = df_true[['ds', 'y']]
|
||||||
eval_df = loadcsv(os.path.join(config.dataset, 'model_evaluation.csv'))
|
eval_df = loadcsv(os.path.join(config.dataset, 'model_evaluation.csv'))
|
||||||
# 按评估指标排序,取前五
|
# 按评估指标排序,取前五
|
||||||
@ -3568,8 +3611,10 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
# 特征工程
|
# 特征工程
|
||||||
# 预测列分析
|
# 预测列分析
|
||||||
content.append(Graphs.draw_text(' 电碳价格自相关ACF和偏自相关PACF分析:'))
|
content.append(Graphs.draw_text(' 电碳价格自相关ACF和偏自相关PACF分析:'))
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标数据自相关图.png')))
|
content.append(Graphs.draw_img(
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '指标数据偏自相关图.png')))
|
os.path.join(config.dataset, '指标数据自相关图.png')))
|
||||||
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '指标数据偏自相关图.png')))
|
||||||
content.append(Graphs.draw_text(' 解读:'))
|
content.append(Graphs.draw_text(' 解读:'))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
' 自相关函数的取值范围为 [-1, 1]。正值表示信号在不同时间点之间具有正相关性,负值表示信号具有负相关性,而 0 表示信号在不同时间点之间不相关。 '))
|
' 自相关函数的取值范围为 [-1, 1]。正值表示信号在不同时间点之间具有正相关性,负值表示信号具有负相关性,而 0 表示信号在不同时间点之间不相关。 '))
|
||||||
@ -3611,7 +3656,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
# 删除空列
|
# 删除空列
|
||||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||||
correlation_df.dropna(inplace=True)
|
correlation_df.dropna(inplace=True)
|
||||||
correlation_df.to_csv(os.path.join(config.dataset, '指标相关性分析.csv'), index=False)
|
correlation_df.to_csv(os.path.join(
|
||||||
|
config.dataset, '指标相关性分析.csv'), index=False)
|
||||||
data = correlation_df['Pearson_Correlation'].values.tolist()
|
data = correlation_df['Pearson_Correlation'].values.tolist()
|
||||||
# 生成 -1 到 1 的 20 个区间
|
# 生成 -1 到 1 的 20 个区间
|
||||||
bins = np.linspace(-1, 1, 21)
|
bins = np.linspace(-1, 1, 21)
|
||||||
@ -3644,7 +3690,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
plt.close()
|
plt.close()
|
||||||
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
content.append(Graphs.draw_text(f'指标相关性分析--皮尔逊相关系数:'))
|
||||||
# 皮尔逊正相关 不相关 负相关 的表格
|
# 皮尔逊正相关 不相关 负相关 的表格
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '皮尔逊相关性系数.png')))
|
||||||
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
content.append(Graphs.draw_text('''皮尔逊相关系数说明:'''))
|
||||||
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
content.append(Graphs.draw_text('''衡量两个特征之间的线性相关性。'''))
|
||||||
content.append(Graphs.draw_text('''
|
content.append(Graphs.draw_text('''
|
||||||
@ -3663,7 +3710,8 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
'''相关系数接近0:表示两个变量之间不存在线性关系,即它们的变化不会随着对方的变化而变化。'''))
|
||||||
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
content.append(Graphs.draw_text(f'指标相关性分析--斯皮尔曼相关系数:'))
|
||||||
# 皮尔逊正相关 不相关 负相关 的表格
|
# 皮尔逊正相关 不相关 负相关 的表格
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '斯皮尔曼相关性系数.png')))
|
||||||
content.append(Graphs.draw_text(
|
content.append(Graphs.draw_text(
|
||||||
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
'斯皮尔曼相关系数(Spearmans rank correlation coefficient)是一种用于衡量两个变量之间的单调关系(不一定是线性关系)的统计指标。'))
|
||||||
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
content.append(Graphs.draw_text('它的计算基于变量的秩次(即变量值的排序位置)而非变量的原始值。'))
|
||||||
@ -3720,7 +3768,9 @@ def tansuanli_export_pdf(num_indicators=475, num_models=22, num_dayindicator=202
|
|||||||
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,先计算预测值与真实值之差的平方,然后对这些平方差求平均值。取值越小,误差越小,预测效果越好。'))
|
'3. 平均平方误差(MSE):平均平方误差是衡量预测值与实际值之间误差的一种方法,先计算预测值与真实值之差的平方,然后对这些平方差求平均值。取值越小,误差越小,预测效果越好。'))
|
||||||
content.append(Graphs.draw_text('模型拟合:'))
|
content.append(Graphs.draw_text('模型拟合:'))
|
||||||
# 添加图片
|
# 添加图片
|
||||||
content.append(Graphs.draw_img(os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
content.append(Graphs.draw_img(
|
||||||
|
os.path.join(config.dataset, '预测值与真实值对比图.png')))
|
||||||
# 生成pdf文件
|
# 生成pdf文件
|
||||||
doc = SimpleDocTemplate(os.path.join(config.dataset, reportname), pagesize=letter)
|
doc = SimpleDocTemplate(os.path.join(
|
||||||
|
config.dataset, reportname), pagesize=letter)
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
|
Loading…
Reference in New Issue
Block a user