埃森哲代码发版

This commit is contained in:
workpc 2025-05-07 11:21:57 +08:00
parent 5a8e3ae007
commit 91abe4cfb7
16 changed files with 514 additions and 381 deletions

240
ARIMAreport.py Normal file
View File

@ -0,0 +1,240 @@
from __future__ import annotations
import pdfkit
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller as ADF
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.api import qqplot
from statsmodels.stats.stattools import durbin_watson
from scipy import stats
import warnings
from lib.tools import DeepSeek
warnings.filterwarnings("ignore")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
class ARIMAReportGenerator(DeepSeek):
def __init__(self, data, forecast_steps=7):
super().__init__()
self.data = data
self.forecast_steps = forecast_steps
self.model = None
self.diff_num = 0
self.report_content = []
self.figure_paths = {}
def _save_figure(self, fig_name):
"""统一保存图表并记录路径"""
path = f"{fig_name}.png"
plt.savefig(path, dpi=300, bbox_inches='tight')
plt.close()
self.figure_paths[fig_name] = path
return path
def _add_report_section(self, title, content, level=2):
"""添加报告章节"""
self.report_content.append(f"{'#'*level} {title}\n{content}\n")
def plot_forecast(self, predicted_mean, conf_int):
"""预测结果可视化"""
plt.figure(figsize=(12, 6))
plt.plot(self.data[-30:], label='历史数据')
plt.plot(predicted_mean, label='预测值', color='r')
plt.fill_between(conf_int.index,
conf_int['lower'],
conf_int['upper'],
color='r', alpha=0.2)
plt.title('ARIMA模型预测结果')
plt.legend()
self._save_figure('forecast_plot')
def generate_diagnostic_plots(self):
"""生成诊断图表集"""
# 残差诊断图
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
qqplot(self.model.resid, line='q', ax=ax1)
ax1.set_title('Q-Q图')
self.model.resid.plot(ax=ax2, title='残差序列')
self._save_figure('residual_diagnostic')
# ACF/PACF图
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
plot_acf(self.model.resid, ax=ax1, lags=20)
plot_pacf(self.model.resid, ax=ax2, lags=20)
self._save_figure('acf_pacf')
def build_model(self):
"""模型构建与诊断"""
# 差分平稳化处理
diff_data = self.data.copy()
while ADF(diff_data)[1] > 0.05:
diff_data = diff_data.diff().dropna()
self.diff_num += 1
# 自动定阶示例使用AIC准则
aic_results = sm.tsa.arma_order_select_ic(
diff_data, max_ar=4, max_ma=4, ic='aic')
p, q = aic_results['aic_min_order']
# 模型训练
self.model = ARIMA(self.data, order=(p, self.diff_num, q)).fit()
# 生成预测
forecast = self.model.get_forecast(steps=self.forecast_steps)
# 生成工作日日期索引
last_date = self.data.index[-1].normalize()
forecast_dates = pd.date_range(
start=last_date + pd.Timedelta(days=1),
periods=self.forecast_steps,
freq='B' # B表示工作日
).normalize()
# 设置预测结果日期索引
predicted_mean = pd.Series(
forecast.predicted_mean.values,
index=forecast_dates,
name='predicted_mean'
)
conf_int = pd.DataFrame(
forecast.conf_int().values,
index=forecast_dates,
columns=['lower', 'upper']
)
variance_series = pd.Series(
forecast.se_mean.values,
index=forecast_dates,
name='std_error'
)
# 保存预测结果
predicted_mean.to_csv('ARIMA预测结果.csv')
# 生成图表
self.plot_forecast(predicted_mean, conf_int)
self.generate_diagnostic_plots()
return predicted_mean, conf_int, variance_series
def _build_stat_table(self, test_name, results):
"""构建统计检验表格"""
return pd.DataFrame(results.items(), columns=['指标', '']).to_markdown(index=False)
def generate_report(self):
"""生成完整报告"""
# 预测结果
predicted_mean, conf_int, variance_series = self.build_model()
aifengxi = self.summary(predicted_mean.to_markdown(index=False))
# 创建带日期索引的汇总表格
summary_df = pd.DataFrame({
'mean': predicted_mean.rename(None),
'mean_se': variance_series.rename(None),
'mean_ci_lower': conf_int['lower'].values,
'mean_ci_upper': conf_int['upper'].values
}, index=predicted_mean.index.normalize().strftime('%Y-%m-%d'))
forecast_table = summary_df.to_markdown()
self._add_report_section('核心预测结果',
f"![预测结果]({self.figure_paths['forecast_plot']})\n\n"
"该图表展示了历史数据蓝线与模型预测值红线阴影区域表示95%置信区间。"
f"预测区间显示随着预测步长增加,不确定性逐渐扩大。\n\n{forecast_table}")
self._add_report_section('预测结果AI分析',
aifengxi)
# 模型诊断
diag_content = (
f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
f"![残差诊断]({self.figure_paths['residual_diagnostic']})\n\n"
"左图Q-Q图用于检验残差的正态性理想情况下散点应沿对角线分布。"
"右图展示残差序列应呈现随机波动,无明显趋势或周期性。\n\n"
f"![自相关图]({self.figure_paths['acf_pacf']})\n\n"
"自相关图ACF和偏自相关图PACF显示残差序列的相关性良好的模型应不存在显著的自相关"
"(各阶滞后系数应落在置信区间内)。\n\n"
f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
"DW检验值接近2当前值{value})表明残差间不存在显著的一阶自相关。".format(
value=f"{durbin_watson(self.model.resid):.2f}")
)
diag_content = (
f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
f"![残差诊断]({self.figure_paths['residual_diagnostic']})\n\n"
"左图Q-Q图用于检验残差的正态性理想情况下散点应沿对角线分布。"
"右图展示残差序列应呈现随机波动,无明显趋势或周期性。\n\n"
f"![自相关图]({self.figure_paths['acf_pacf']})\n\n"
"自相关图ACF和偏自相关图PACF显示残差序列的相关性良好的模型应不存在显著的自相关"
"(各阶滞后系数应落在置信区间内)。\n\n"
f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
"DW检验值接近2当前值{value})表明残差间不存在显著的一阶自相关。".format(
value=f"{durbin_watson(self.model.resid):.2f}")
)
self._add_report_section('模型诊断', diag_content)
# 统计检验
adf_results = {
"ADF统计量": ADF(self.data)[0],
"p值": ADF(self.data)[1],
"差分阶数": self.diff_num
}
adf_test_text = (
"ADF检验用于验证时间序列的平稳性原假设为存在单位根非平稳"
f"当p值小于0.05时拒绝原假设,认为序列已平稳。本案例经过{self.diff_num}次差分后达到平稳状态p值={ADF(self.data)[1]:.5f})。"
)
self._add_report_section('平稳性检验',
f"{adf_test_text}\n\n{self._build_stat_table('ADF检验', adf_results)}")
# 模型评价指标
metrics = {
"AIC": self.model.aic,
"BIC": self.model.bic,
"HQIC": self.model.hqic
}
metric_explanation = (
"AIC赤池信息准则、BIC贝叶斯信息准则和HQIC汉南-奎因信息准则)用于评估模型拟合优度与复杂度的平衡,"
"数值越小通常表示模型越优。但这些准则更适用于相同差分阶数下的模型比较。"
)
self._add_report_section('模型评价',
f"{metric_explanation}\n\n{self._build_stat_table('信息准则', metrics)}")
# 保存报告
with open('ARIMA_Report.md', 'w', encoding='utf-8') as f:
f.write("\n".join(self.report_content))
# 执行cmd命令转pdf pandoc ARIMA_Report.md -o ARIMA_Report.pdf --pdf-engine=xelatex -V CJKmainfont="SimHei"
# 转换为PDF
try:
import subprocess
subprocess.run([
'pandoc',
'ARIMA_Report.md',
'-o', 'ARIMA_Report.pdf',
'--pdf-engine=xelatex',
'-V', 'CJKmainfont=SimHei'
], check=True)
print("PDF报告已生成ARIMA_Report.pdf")
except subprocess.CalledProcessError as e:
print(f"PDF转换失败请确保已安装pandoc和xelatex: {e}")
except FileNotFoundError:
print("未找到pandoc请先安装: https://pandoc.org/installing.html")
if __name__ == '__main__':
# 示例数据加载
data = pd.read_csv(
r'D:\code\PriceForecast-svn\yuanyouzhoududataset\指标数据.csv', index_col='ds', parse_dates=True)
# 示例数据加载
# data = pd.read_csv(
# r'D:\code\PriceForecast-svn\juxitingdataset\指标数据.csv', index_col='ds', parse_dates=True)
# 生成报告
reporter = ARIMAReportGenerator(data['y'], forecast_steps=30)
reporter.generate_report()
print("ARIMA分析报告已生成ARIMA_Report.md")

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@ -1122,7 +1122,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@ -1162,76 +1162,14 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"运行中...\n",
"20250408\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:99: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
" df = df.applymap(lambda x: float(x) if isinstance(x, (int, float)) else x)\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:103: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
" df = df.fillna(method='ffill')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 \\\n",
"1399 2025-04-06 3600.0 37.3095 1.0 -1051.74 6000.0 107382.182661 \n",
"1400 2025-04-07 3500.0 36.784 1.0 478.28 6000.0 107945.963078 \n",
"\n",
" 下游客户价格预期 即期成本 订单结构 计划产量 京博产量 \n",
"1399 3630.0 3136.0033 1.0 4982.8366 5715.5175 \n",
"1400 3630.0 2972.5098 1.0 4982.8366 5522.676 \n",
"前一天的 3136.0033 <class 'float'>\n",
"现在的 2972.5098 <class 'float'>\n",
" index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 \\\n",
"0 1399 2025-04-06 3600 37.3095 1 -1051.74 6000 107382.182661 \n",
"1 1400 2025-04-07 3500 36.784 1 478.28 6000 107945.963078 \n",
"\n",
" 下游客户价格预期 即期成本 订单结构 计划产量 京博产量 \n",
"0 3630 3136.0033 1 4982.8366 5715.5175 \n",
"1 3630 2972.5098 1 4982.8366 5522.676 \n",
"昨日计划提货偏差改之前 478.28\n",
"昨日计划提货偏差改之后 539.8394000000008\n",
"**************************************************预测结果: 3567.73\n",
"更新前一天数据\n",
"更新数据前\n",
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
"1401 2025-04-08 3450 36.784 1 478.28 6000 107945.963078 3630 \n",
"\n",
" 即期成本 订单结构 计划产量 京博产量 \n",
"1401 3096.5238 1 4982.8366 5522.676 \n",
"日期存在,即将更新\n",
"新数据 [3500.0, 36.784, '', 478.28, '', 107945.9630779, '', 2972.5098, '', 4982.8366, 5522.676]\n",
"更新数据后\n",
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
"1401 2025-04-08 3450 36.784 1 478.28 6000 107945.963078 3630 \n",
"\n",
" 即期成本 订单结构 计划产量 京博产量 \n",
"1401 3096.5238 1 4982.8366 5522.676 \n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[45], line 15\u001b[0m\n\u001b[0;32m 13\u001b[0m token \u001b[38;5;241m=\u001b[39m getLogToken()\n\u001b[0;32m 14\u001b[0m updateYesterdayExcelData(token\u001b[38;5;241m=\u001b[39mtoken)\n\u001b[1;32m---> 15\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 16\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m执行失败: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
"运行中...\n"
]
}
],
@ -1265,8 +1203,8 @@
"\n",
"\n",
"\n",
"# start_date = datetime(2025, 4, 1)\n",
"# end_date = datetime(2025, 4, 2)\n",
"# start_date = datetime(2025,5,1)\n",
"# end_date = datetime(2025, 5, 7)\n",
"# token = getLogToken()\n",
"# while start_date < end_date:\n",
"# main(start_date,token)\n",
@ -1317,7 +1255,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -1331,7 +1269,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.7.0"
}
},
"nbformat": 4,

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 12,
"metadata": {},
"outputs": [
{
@ -10,12 +10,12 @@
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
@ -679,8 +679,6 @@
" print(date)\n",
" # 更新当月数据\n",
" queryDataListItemNos(token)\n",
" # 更新当日数据\n",
" start_3(start_date,token,token_push)\n",
" # 训练模型\n",
" optimize_Model()\n",
" # 预测&上传预测结果\n",
@ -696,9 +694,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"运行中ing...\n",
"18:13:56任务失败\n",
"18:13:59任务失败\n"
"运行中ing...\n"
]
}
],
@ -714,10 +710,10 @@
"\n",
" # 判断当前时间是否为执行任务的时间点\n",
" try:\n",
" if current_time == \"17:05:00\":\n",
" if current_time == \"09:15:00\":\n",
" print(\"执行定时任务\")\n",
" main()\n",
" elif current_time == \"17:10:00\":\n",
" elif current_time == \"20:00:00\":\n",
" print('更新数据')\n",
" start_3()\n",
" time.sleep(1)\n",
@ -725,9 +721,9 @@
" print(f\"{current_time}任务失败\")\n",
"\n",
"\n",
"# # 检测数据准确性, 需要检测放开\n",
"# # check_data(\"100028098|LISTING_PRICE\")\n",
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
" # 检测数据准确性, 需要检测放开\n",
" # check_data(\"100028098|LISTING_PRICE\")\n",
" # check_data(\"9137070016544622XB|DAY_Yield\")\n"
]
},
{
@ -736,14 +732,14 @@
"metadata": {},
"outputs": [],
"source": [
"# start_date = datetime(2025, 4, 8)\n",
"# end_date = datetime(2025, 4, 9)\n",
"# start_date = datetime(2025, 4, 1)\n",
"# end_date = datetime(2025, 5, 7)\n",
"# token = get_head_auth()\n",
"\n",
"# token_push = get_head_push_auth()\n",
"\n",
"# while start_date < end_date:\n",
" # main(start_date,token,token_push)\n",
"# main(start_date,token,token_push)\n",
"# start_date += timedelta(days=1)\n"
]
},
@ -936,7 +932,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -950,7 +946,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.7.0"
}
},
"nbformat": 4,

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [
{
@ -10,12 +10,12 @@
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
@ -500,6 +500,7 @@
" tuple: (紧凑日期字符串, 标准日期字符串)\n",
" \"\"\"\n",
" # 日期解析逻辑\n",
" from datetime import datetime,timedelta\n",
" if isinstance(date, datetime):\n",
" now = date\n",
" else:\n",
@ -695,7 +696,7 @@
" # 更新当月数据\n",
" queryDataListItemNos(start_date,token)\n",
" # 更新当日数据\n",
" # start(date)\n",
" start(date)\n",
" # 训练模型\n",
" optimize_Model()\n",
" # 预测&上传预测结果\n",
@ -705,52 +706,12 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"\n",
"# if __name__ == \"__main__\":\n",
"# print('运行中')\n",
"# # 需要单独运行放开\n",
"# # start()\n",
"# # start_1(date='2025-01-22')\n",
"# # start_1()\n",
"\n",
"# # 每天定时12点运行\n",
"# while True:\n",
"# try:\n",
"# # 获取当前时间\n",
"# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
"# current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
"# # print(current_time_1)\n",
"\n",
"\n",
"\n",
"\n",
"# # 判断当前时间是否为执行任务的时间点\n",
"# if current_time == \"09:15:00\":\n",
"# print(\"执行定时任务\")\n",
" # start()\n",
"\n",
"# # 休眠1秒钟避免过多占用CPU资源\n",
"# time.sleep(1)\n",
"\n",
"# elif current_time_1 == \"20:00:00\":\n",
"# print(\"更新数据\")\n",
"# start_1()\n",
"# time.sleep(1)\n",
"# except:\n",
"# print('执行错误')\n",
"# time.sleep(1)\n",
"\n",
"\n",
"# # 检测数据准确性, 需要检测放开\n",
"# # check_data(\"100028098|LISTING_PRICE\")\n",
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
]
"source": []
},
{
"cell_type": "code",
@ -761,103 +722,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"运行中ing...\n",
"获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0NDE5ODg0NywianRpIjoiZmJlMmI4MzA5NzFmNDBhMzhiZTA5YTZjMDEyZjU4YmQifQ.rGLp0UBfeu5JmoYXbGSgCpkrO2QnlAx8hFbbbDDXC8I\n",
"20250409\n",
" dataDate dataItemNo dataValue\n",
"0 2025-04-01 100028046|LISTING_PRICE 8208.0\n",
"1 2025-04-02 100028046|LISTING_PRICE 8244.0\n",
"2 2025-04-03 100028046|LISTING_PRICE 8244.0\n",
"3 2025-04-04 100028046|LISTING_PRICE 8165.0\n",
"4 2025-04-05 100028046|LISTING_PRICE 8114.0\n",
".. ... ... ...\n",
"183 2025-04-07 YHQMXBB|C01100008|STRIKE_PRICE 5180.0\n",
"184 2025-04-02 YHQMXBB|C01100008|STRIKE_PRICE 5310.0\n",
"185 2025-04-01 YHQMXBB|C01100008|STRIKE_PRICE 5260.0\n",
"186 2025-04-04 YHQMXBB|C01100008|STRIKE_PRICE 5230.0\n",
"187 2025-04-05 YHQMXBB|C01100008|STRIKE_PRICE 5180.0\n",
"\n",
"[188 rows x 3 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:614: DeprecationWarning:\n",
"\n",
"The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"当月数据更新完成\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:255: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:257: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: QtAgg\n",
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
"Populating the interactive namespace from numpy and matplotlib\n",
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
"\n",
"pylab import has clobbered these variables: ['plot', 'random', '__version__', 'datetime']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date\n",
"2025-04-09 5179.792969\n",
"Name: 日度预测价格, dtype: float32\n",
"预测值: 5179.79\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:203: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:205: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:237: FutureWarning:\n",
"\n",
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"\n"
"运行中ing...\n"
]
}
],
@ -866,19 +731,19 @@
"if __name__ == \"__main__\":\n",
" print(\"运行中ing...\")\n",
" # 每天定时12点运行\n",
" # while True:\n",
" # # 获取当前时间\n",
" # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
" # try:\n",
" # # 判断当前时间是否为执行任务的时间点\n",
" # if current_time == \"12:00:00\":\n",
" # print(\"执行定时任务\")\n",
" # main()\n",
" # elif current_time == \"20:00:00\":\n",
" # start_1()\n",
" # time.sleep(1)\n",
" # except:\n",
" # print(f\"{current_time}执行失败\")\n",
" while True:\n",
" # 获取当前时间\n",
" current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
" try:\n",
" # 判断当前时间是否为执行任务的时间点\n",
" if current_time == \"09:15:00\":\n",
" print(\"执行定时任务\")\n",
" main()\n",
" elif current_time == \"20:00:00\":\n",
" start_1()\n",
" time.sleep(1)\n",
" except:\n",
" print(f\"{current_time}执行失败\")\n",
"\n",
" # 检测数据准确性, 需要检测放开\n",
" # check_data(\"100028098|LISTING_PRICE\")\n",
@ -887,24 +752,23 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# start_date = datetime(2025, 4, 2)\n",
"# end_date = datetime(2025, 4, 3)\n",
"# start_date = datetime(2025, 4, 1)\n",
"# end_date = datetime(2025, 5, 7)\n",
"# token = get_head_auth()\n",
"\n",
"# while start_date < end_date:\n",
"# date = start_date.strftime('%Y%m%d')\n",
"# date2 = start_date.strftime('%Y-%m-%d')\n",
"# queryDataListItemNos(date=start_date,token=token)\n",
"# updateYesterdayExcelData(date=date2,token=token)\n",
"# start(date)\n",
"# main(start_date=start_date,token=token,token_push=token)\n",
"# # # time.sleep(1)\n",
"# # start_1(start_date)\n",
"# start_date += timedelta(days=1)\n",
"# time.sleep(5)\n",
"# time.sleep(2)\n",
"\n",
"# # print(price_list)"
]
@ -919,7 +783,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "base",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -933,7 +797,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.7.0"
}
},
"nbformat": 4,

View File

@ -5,17 +5,27 @@
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n",
"\n",
"The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
"\n"
]
},
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
@ -33,8 +43,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"获取到的数据项ID['数据项编码', 'C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n",
"获取到的数据项ID['C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n"
"运行中ing\n"
]
}
],
@ -209,6 +218,7 @@
" tuple: (紧凑日期字符串, 标准日期字符串)\n",
" \"\"\"\n",
" # 日期解析逻辑\n",
" from datetime import datetime,timedelta\n",
" if isinstance(date, datetime):\n",
" now = date\n",
" else:\n",
@ -265,22 +275,22 @@
"\n",
"def upload_data_to_system(token_push,date):\n",
" datavalue = forecast_price()\n",
" # data = {\n",
" # \"funcModule\": \"数据表信息列表\",\n",
" # \"funcOperation\": \"新增\",\n",
" # \"data\": [\n",
" # {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
" # \"dataDate\": get_cur_time(date)[0],\n",
" # \"dataStatus\": \"add\",\n",
" # \"dataValue\": datavalue\n",
" # }\n",
" data = {\n",
" \"funcModule\": \"数据表信息列表\",\n",
" \"funcOperation\": \"新增\",\n",
" \"data\": [\n",
" {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
" \"dataDate\": get_cur_time(date)[0],\n",
" \"dataStatus\": \"add\",\n",
" \"dataValue\": datavalue\n",
" }\n",
"\n",
" # ]\n",
" # }\n",
" # print(data)\n",
" # headers = {\"Authorization\": token_push}\n",
" # res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
" # print(res.text)\n",
" ]\n",
" }\n",
" print(data)\n",
" headers = {\"Authorization\": token_push}\n",
" res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
" print(res.text)\n",
"\n",
" \n",
"# def upload_data_to_system(token):\n",
@ -564,7 +574,8 @@
" return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [getNow()[1]]\n",
" append_rows = [getNow(date)[1]]\n",
"# append_rows = [getNow()[1]]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
@ -844,6 +855,7 @@
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" print('运行中ing')\n",
"\n",
" # 每天定时12点运行\n",
" while True:\n",
@ -875,14 +887,14 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# # 自定义日期执行预测\n",
"# # # 自定义日期执行预测\n",
"\n",
"# start_date = datetime(2025, 4, 8)\n",
"# end_date = datetime(2025, 4, 9)\n",
"# start_date = datetime(2025, 5, 1)\n",
"# end_date = datetime(2025, 5, 7)\n",
"\n",
"# token = get_head_auth()\n",
"# token_push = get_head_push_auth()\n",
@ -904,7 +916,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "base",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -918,7 +930,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.7.0"
}
},
"nbformat": 4,

View File

@ -242,7 +242,7 @@ table_name = 'v_tbl_crude_oil_warning'
# 开关
is_train = True # 是否训练
is_debug = False # 是否调试
is_eta = False # 是否使用eta接口
is_eta = True # 是否使用eta接口
is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
is_timefurture = True # 是否使用时间特征
is_fivemodels = False # 是否使用之前保存的最佳的5个模型

View File

@ -491,13 +491,17 @@ def featurePindu(dataset):
featureInfo += ', 详看 附1、特征列表'
featureInfo += '''
数据特征工程
1. 数据日期排序新日期在最后
2. 删除空列特征数据列没有值就删除
3. 删除近两月不再更新值的指标
4. 非日度数据填充为日度数据填充规则
-- 向后填充举例假设周五出现一个周度指标数据那么在这之前的数据用上周五的数据
-- 向前填充举例采集数据开始日期为2018年1月1日那么周度数据可能是2018年1月3日那么3日的数据向前填充使1日2日都有数值
时序数据工程处理
1. 时序对齐处理
- 按观测时间升序排列确保最新数据位于序列末端
2. 空值特征筛除
- 删除全空值特征列缺失率=100%
3. 失效指标剔除
- 基于最近两个月数据更新状态移除停止更新的指标
4. 多粒度特征时序化处理
- 非日度指标采用前向/后向插值法转化为日频数据
后向填充当周度指标在周五更新时用上周五值填充当周前四日
前向填充对数据采集起始日的空白值采用首个有效值前推填充
数据特征相关性分析
'''
config.logger.info(featureInfo)

View File

@ -1,30 +1,46 @@
import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
import random, string, base64, hmac, hashlib
from reportlab.pdfbase import pdfmetrics # 注册字体
from reportlab.pdfbase.ttfonts import TTFont # 字体类
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
from reportlab.lib import colors # 颜色模块
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
from reportlab.graphics.charts.legends import Legend # 图例类
from reportlab.graphics.shapes import Drawing # 绘图工具
from reportlab.lib.units import cm # 单位cm
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import sqlite3
import pymysql
import tkinter as tk
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_core.prompts import PromptTemplate
from tkinter import messagebox
import tkinter as tk
import pymysql
import sqlite3
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
from reportlab.lib.units import cm # 单位cm
from reportlab.graphics.shapes import Drawing # 绘图工具
from reportlab.graphics.charts.legends import Legend # 图例类
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
from reportlab.lib import colors # 颜色模块
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
from reportlab.pdfbase.ttfonts import TTFont # 字体类
from reportlab.pdfbase import pdfmetrics # 注册字体
import hashlib
import hmac
import base64
import string
import random
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import time
import logging
from dotenv import load_dotenv
load_dotenv()
global logger
def timeit(func):
'''计时装饰器'''
def wrapper(*args, **kwargs):
@ -36,10 +52,12 @@ def timeit(func):
return result
return wrapper
class BinanceAPI:
'''
获取 Binance API 请求头签名
'''
def __init__(self, APPID, SECRET):
self.APPID = APPID
self.SECRET = SECRET
@ -47,7 +65,8 @@ class BinanceAPI:
# 生成随机字符串作为 nonce
def generate_nonce(self, length=32):
self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
self.nonce = ''.join(random.choices(
string.ascii_letters + string.digits, k=length))
return self.nonce
# 获取当前时间戳(秒)
@ -124,7 +143,7 @@ class Graphs:
# 绘制表格
@staticmethod
def draw_table(col_width,*args):
def draw_table(col_width, *args):
# 列宽度
col_width = col_width
style = [
@ -196,6 +215,8 @@ def mse(y_true, y_pred):
return res_mse
# RMSE
def rmse(y_true, y_pred):
res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
@ -203,6 +224,8 @@ def rmse(y_true, y_pred):
return res_rmse
# MAE
def mae(y_true, y_pred):
res_mae = metrics.mean_absolute_error(y_true, y_pred)
@ -211,6 +234,8 @@ def mae(y_true, y_pred):
# sklearn的库中没有MAPE和SMAPE下面根据公式给出算法实现
# MAPE
def mape(y_true, y_pred):
res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
@ -218,13 +243,18 @@ def mape(y_true, y_pred):
return res_mape
# SMAPE
def smape(y_true, y_pred):
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) /
(np.abs(y_pred) + np.abs(y_true))) * 100
return res_smape
# 相关系数绘制
def plot_corr(data, size=11):
# 去掉ds列
data.drop(columns=['ds'], inplace=True)
@ -234,10 +264,11 @@ def plot_corr(data, size=11):
# 计算各特征与目标列的皮尔逊相关系数,并保存到新的 DataFrame 中
for col in data.columns:
if col!= 'y':
if col != 'y':
pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
spearman_correlation, _ = spearmanr(data[col], data['y'])
new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
new_row = {'Feature': col, 'Pearson_Correlation': round(
pearson_correlation, 3), 'Spearman_Correlation': round(spearman_correlation, 2)}
correlation_df = correlation_df._append(new_row, ignore_index=True)
# 删除空列
correlation_df.drop('Correlation', axis=1, inplace=True)
@ -248,9 +279,10 @@ def plot_corr(data, size=11):
# 生成 -1 到 1 的 20 个区间
bins = np.linspace(-1, 1, 21)
# 计算每个区间的统计数(这里是区间内数据的数量)
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
for i in range(len(bins) - 1)]
#设置画布大小
# 设置画布大小
plt.figure(figsize=(10, 6))
# 绘制直方图
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
@ -262,12 +294,12 @@ def plot_corr(data, size=11):
plt.savefig('皮尔逊相关性系数.png')
plt.close()
#设置画布大小
# 设置画布大小
plt.figure(figsize=(10, 6))
data = correlation_df['Spearman_Correlation'].values.tolist()
# 计算每个区间的统计数(这里是区间内数据的数量)
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
for i in range(len(bins) - 1)]
# 绘制直方图
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
@ -282,9 +314,9 @@ def plot_corr(data, size=11):
# 邮件封装
class SendMail(object):
def __init__(self,username,passwd,recv,title,content,
file=None,ssl=False,
email_host='smtp.qq.com',port=25,ssl_port=465):
def __init__(self, username, passwd, recv, title, content,
file=None, ssl=False,
email_host='smtp.qq.com', port=25, ssl_port=465):
'''
:param username: 用户名
:param passwd: 密码
@ -297,62 +329,67 @@ class SendMail(object):
:param port: 非安全链接端口默认为25
:param ssl_port: 安全链接端口默认为465
'''
self.username = username #用户名
self.passwd = passwd #密码
self.recv = recv #收件人多个要传list ['a@qq.com','b@qq.com]
self.title = title #邮件标题
self.content = content #邮件正文
self.file = file #附件路径,如果不在当前目录下,要写绝对路径
self.email_host = email_host #smtp服务器地址
self.port = port #普通端口
self.ssl = ssl #是否安全链接
self.ssl_port = ssl_port #安全链接端口
self.username = username # 用户名
self.passwd = passwd # 密码
self.recv = recv # 收件人多个要传list ['a@qq.com','b@qq.com]
self.title = title # 邮件标题
self.content = content # 邮件正文
self.file = file # 附件路径,如果不在当前目录下,要写绝对路径
self.email_host = email_host # smtp服务器地址
self.port = port # 普通端口
self.ssl = ssl # 是否安全链接
self.ssl_port = ssl_port # 安全链接端口
def send_mail(self):
msg = MIMEMultipart()
#发送内容的对象
if self.file:#处理附件的
file_name = os.path.split(self.file)[-1]#只取文件名,不取路径
# 发送内容的对象
if self.file: # 处理附件的
file_name = os.path.split(self.file)[-1] # 只取文件名,不取路径
try:
f = open(self.file, 'rb').read()
except Exception as e:
raise Exception('附件打不开!!!!')
else:
att = MIMEText(f,"base64", "utf-8")
att = MIMEText(f, "base64", "utf-8")
att["Content-Type"] = 'application/octet-stream'
#base64.b64encode(file_name.encode()).decode()
new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
#这里是处理文件名为中文名的,必须这么写
att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
# base64.b64encode(file_name.encode()).decode()
new_file_name = '=?utf-8?b?' + \
base64.b64encode(file_name.encode()).decode() + '?='
# 这里是处理文件名为中文名的,必须这么写
att["Content-Disposition"] = 'attachment; filename="%s"' % (
new_file_name)
msg.attach(att)
msg.attach(MIMEText(self.content))#邮件正文的内容
msg.attach(MIMEText(self.content)) # 邮件正文的内容
msg['Subject'] = self.title # 邮件主题
msg['From'] = self.username # 发送者账号
msg['To'] = ','.join(self.recv) # 接收者账号列表
if self.ssl:
self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
self.smtp = smtplib.SMTP_SSL(self.email_host, port=self.ssl_port)
else:
self.smtp = smtplib.SMTP(self.email_host,port=self.port)
#发送邮件服务器的对象
self.smtp.login(self.username,self.passwd)
self.smtp = smtplib.SMTP(self.email_host, port=self.port)
# 发送邮件服务器的对象
self.smtp.login(self.username, self.passwd)
try:
self.smtp.sendmail(self.username,self.recv,msg.as_string())
self.smtp.sendmail(self.username, self.recv, msg.as_string())
pass
except Exception as e:
print('出错了。。',e)
logger.info('邮件服务出错了。。',e)
print('出错了。。', e)
logger.info('邮件服务出错了。。', e)
else:
print('发送成功!')
self.smtp.quit()
def dateConvert(df, datecol='ds'):
# 将date列转换为datetime类型
try:
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
df[datecol] = pd.to_datetime(df[datecol], format=r'%Y-%m-%d')
except:
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
df[datecol] = pd.to_datetime(df[datecol], format=r'%Y/%m/%d')
return df
def save_to_database(sqlitedb,df,dbname,end_time):
def save_to_database(sqlitedb, df, dbname, end_time):
'''
create_dt ,ds 判断数据是否存在不存在则插入存在则更新
'''
@ -361,20 +398,25 @@ def save_to_database(sqlitedb,df,dbname,end_time):
df['ds'] = df['ds'].dt.strftime('%Y-%m-%d')
if not sqlitedb.check_table_exists(dbname):
df.to_sql(dbname,sqlitedb.connection,index=False)
df.to_sql(dbname, sqlitedb.connection, index=False)
else:
for col in df.columns:
sqlitedb.add_column_if_not_exists(dbname,col,'TEXT')
sqlitedb.add_column_if_not_exists(dbname, col, 'TEXT')
for row in df.itertuples(index=False):
row_dict = row._asdict()
columns=row_dict.keys()
check_query = sqlitedb.select_data(dbname,where_condition = f"ds = '{row.ds}' and created_dt = '{end_time}'")
columns = row_dict.keys()
check_query = sqlitedb.select_data(
dbname, where_condition=f"ds = '{row.ds}' and created_dt = '{end_time}'")
if len(check_query) > 0:
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
sqlitedb.update_data(dbname,set_clause,where_condition = f"ds = '{row.ds} and created_dt = {end_time}'")
set_clause = ", ".join(
[f"{key} = '{value}'" for key, value in row_dict.items()])
sqlitedb.update_data(
dbname, set_clause, where_condition=f"ds = '{row.ds} and created_dt = {end_time}'")
continue
else:
sqlitedb.insert_data(dbname,tuple(row_dict.values()),columns=columns)
sqlitedb.insert_data(dbname, tuple(
row_dict.values()), columns=columns)
class SQLiteHandler:
def __init__(self, db_name):
@ -426,7 +468,8 @@ class SQLiteHandler:
query += f" LIMIT {limit}"
results = self.execute_query(query).fetchall()
if results:
headers = [description[0] for description in self.execute_query(query).description]
headers = [description[0]
for description in self.execute_query(query).description]
return pd.DataFrame(results, columns=headers)
else:
return pd.DataFrame()
@ -463,14 +506,13 @@ class SQLiteHandler:
query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
self.execute_query(query)
self.commit()
print(f"Column '{column_name}' added to table '{table_name}' successfully.")
print(
f"Column '{column_name}' added to table '{table_name}' successfully.")
else:
print(f"Column '{column_name}' already exists in table '{table_name}'.")
print(
f"Column '{column_name}' already exists in table '{table_name}'.")
import logging
class MySQLDB:
def __init__(self, host, user, password, database):
self.host = host
@ -538,17 +580,20 @@ class MySQLDB:
self.connection.close()
logging.info("Database connection closed.")
def exception_logger(func):
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
# 记录异常日志
logging.error(f"An error occurred in function {func.__name__}: {str(e)}")
logging.error(
f"An error occurred in function {func.__name__}: {str(e)}")
# 可以选择重新抛出异常,或者在这里处理异常
raise e # 重新抛出异常
return wrapper
def get_week_date(end_time):
'''
获取上上周五上周周一周二周三周四周五的日期
@ -560,7 +605,40 @@ def get_week_date(end_time):
up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
return create_dates,ds_dates
return create_dates, ds_dates
class DeepSeek():
def __init__(self):
pass
def summary(self, text):
prompt_template = '''请根据以下ARIMA预测结果分析未来的趋势
"{text}"
请用专业且结构清晰的中文撰写重点数据用**加粗**显示
'''
chinese_prompt = PromptTemplate(
template=prompt_template, input_variables=['text'])
docs = [Document(page_content=text, metadata={
"source": "arima_forecast"})]
apikey = os.environ.get('OPENAI_API_KEY')
llm = ChatOpenAI(
model="deepseek-chat",
temperature=0,
base_url="https://api.deepseek.com/v1",
api_key=os.environ.get('OPENAI_API_KEY')
)
chain = load_summarize_chain(llm, prompt=chinese_prompt)
print('大语言模型分析预测结果')
summary = chain.invoke({"input_documents": docs})['output_text']
print('大语言模型分析结果:')
print(summary)
return summary
if __name__ == '__main__':
print('This is a tool, not a script.')

View File

@ -3297,8 +3297,9 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
# 特征、模型、参数配置
content.append(Graphs.draw_little_title('模型选择:'))
content.append(Graphs.draw_text(
f'本次预测使用了一个专门收集时间序列的NeuralForecast库中的{num_models}个模型:'))
content.append(Graphs.draw_text(f'使用40天的数据预测未来{inputsize}天的数据。'))
f'本次预测调用专用于时间序列预测的NeuralForecast库中{num_models}个模型:'))
content.append(Graphs.draw_text(
f'基于40天历史数据构建多维时间窗口采用注意力机制预测未来{inputsize}天趋势'))
content.append(Graphs.draw_little_title('指标情况:'))
with open(os.path.join(config.dataset, '特征频度统计.txt'), encoding='utf-8') as f:
for line in f.readlines():