埃森哲代码发版

2025-05-07 11:21:57 +08:00 · 2025-05-07 11:21:57 +08:00 · 91abe4cfb7
commit 91abe4cfb7
parent 5a8e3ae007
16 changed files with 514 additions and 381 deletions
--- a/ARIMAreport.py
+++ b/ARIMAreport.py
@ -0,0 +1,240 @@
+from __future__ import annotations
+import pdfkit
+from bs4 import BeautifulSoup
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import statsmodels.api as sm
+from statsmodels.tsa.stattools import adfuller as ADF
+from statsmodels.stats.diagnostic import acorr_ljungbox
+from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
+from statsmodels.tsa.arima.model import ARIMA
+from statsmodels.graphics.api import qqplot
+from statsmodels.stats.stattools import durbin_watson
+from scipy import stats
+import warnings
+
+from lib.tools import DeepSeek
+warnings.filterwarnings("ignore")
+
+plt.rcParams['font.sans-serif'] = ['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+
+
+class ARIMAReportGenerator(DeepSeek):
+    def __init__(self, data, forecast_steps=7):
+        super().__init__()
+        self.data = data
+        self.forecast_steps = forecast_steps
+        self.model = None
+        self.diff_num = 0
+        self.report_content = []
+        self.figure_paths = {}
+
+    def _save_figure(self, fig_name):
+        """统一保存图表并记录路径"""
+        path = f"{fig_name}.png"
+        plt.savefig(path, dpi=300, bbox_inches='tight')
+        plt.close()
+        self.figure_paths[fig_name] = path
+        return path
+
+    def _add_report_section(self, title, content, level=2):
+        """添加报告章节"""
+        self.report_content.append(f"{'#'*level} {title}\n{content}\n")
+
+    def plot_forecast(self, predicted_mean, conf_int):
+        """预测结果可视化"""
+        plt.figure(figsize=(12, 6))
+        plt.plot(self.data[-30:], label='历史数据')
+        plt.plot(predicted_mean, label='预测值', color='r')
+        plt.fill_between(conf_int.index,
+                         conf_int['lower'],
+                         conf_int['upper'],
+                         color='r', alpha=0.2)
+        plt.title('ARIMA模型预测结果')
+        plt.legend()
+        self._save_figure('forecast_plot')
+
+    def generate_diagnostic_plots(self):
+        """生成诊断图表集"""
+        # 残差诊断图
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+        qqplot(self.model.resid, line='q', ax=ax1)
+        ax1.set_title('Q-Q图')
+        self.model.resid.plot(ax=ax2, title='残差序列')
+        self._save_figure('residual_diagnostic')
+
+        # ACF/PACF图
+        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
+        plot_acf(self.model.resid, ax=ax1, lags=20)
+        plot_pacf(self.model.resid, ax=ax2, lags=20)
+        self._save_figure('acf_pacf')
+
+    def build_model(self):
+        """模型构建与诊断"""
+        # 差分平稳化处理
+        diff_data = self.data.copy()
+        while ADF(diff_data)[1] > 0.05:
+            diff_data = diff_data.diff().dropna()
+            self.diff_num += 1
+
+        # 自动定阶（示例使用AIC准则）
+        aic_results = sm.tsa.arma_order_select_ic(
+            diff_data, max_ar=4, max_ma=4, ic='aic')
+        p, q = aic_results['aic_min_order']
+
+        # 模型训练
+        self.model = ARIMA(self.data, order=(p, self.diff_num, q)).fit()
+
+        # 生成预测
+        forecast = self.model.get_forecast(steps=self.forecast_steps)
+
+        # 生成工作日日期索引
+        last_date = self.data.index[-1].normalize()
+        forecast_dates = pd.date_range(
+            start=last_date + pd.Timedelta(days=1),
+            periods=self.forecast_steps,
+            freq='B'  # B表示工作日
+        ).normalize()
+        # 设置预测结果日期索引
+        predicted_mean = pd.Series(
+            forecast.predicted_mean.values,
+            index=forecast_dates,
+            name='predicted_mean'
+        )
+        conf_int = pd.DataFrame(
+            forecast.conf_int().values,
+            index=forecast_dates,
+            columns=['lower', 'upper']
+        )
+        variance_series = pd.Series(
+            forecast.se_mean.values,
+            index=forecast_dates,
+            name='std_error'
+        )
+
+        # 保存预测结果
+        predicted_mean.to_csv('ARIMA预测结果.csv')
+
+        # 生成图表
+        self.plot_forecast(predicted_mean, conf_int)
+        self.generate_diagnostic_plots()
+
+        return predicted_mean, conf_int, variance_series
+
+    def _build_stat_table(self, test_name, results):
+        """构建统计检验表格"""
+        return pd.DataFrame(results.items(), columns=['指标', '值']).to_markdown(index=False)
+
+    def generate_report(self):
+        """生成完整报告"""
+        # 预测结果
+        predicted_mean, conf_int, variance_series = self.build_model()
+
+        aifengxi = self.summary(predicted_mean.to_markdown(index=False))
+
+        # 创建带日期索引的汇总表格
+        summary_df = pd.DataFrame({
+            'mean': predicted_mean.rename(None),
+            'mean_se': variance_series.rename(None),
+            'mean_ci_lower': conf_int['lower'].values,
+            'mean_ci_upper': conf_int['upper'].values
+        }, index=predicted_mean.index.normalize().strftime('%Y-%m-%d'))
+        forecast_table = summary_df.to_markdown()
+        self._add_report_section('核心预测结果',
+                                 f"![预测结果]({self.figure_paths['forecast_plot']})\n\n"
+                                 "该图表展示了历史数据（蓝线）与模型预测值（红线），阴影区域表示95%置信区间。"
+                                 f"预测区间显示随着预测步长增加，不确定性逐渐扩大。\n\n{forecast_table}")
+
+        self._add_report_section('预测结果AI分析',
+                                 aifengxi)
+        # 模型诊断
+        diag_content = (
+            f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
+            f"![残差诊断]({self.figure_paths['residual_diagnostic']})\n\n"
+            "左图Q-Q图用于检验残差的正态性，理想情况下散点应沿对角线分布。"
+            "右图展示残差序列应呈现随机波动，无明显趋势或周期性。\n\n"
+            f"![自相关图]({self.figure_paths['acf_pacf']})\n\n"
+            "自相关图（ACF）和偏自相关图（PACF）显示残差序列的相关性，良好的模型应不存在显著的自相关"
+            "（各阶滞后系数应落在置信区间内）。\n\n"
+            f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
+            "DW检验值接近2（当前值{value}）表明残差间不存在显著的一阶自相关。".format(
+                value=f"{durbin_watson(self.model.resid):.2f}")
+        )
+
+        diag_content = (
+            f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
+            f"![残差诊断]({self.figure_paths['residual_diagnostic']})\n\n"
+            "左图Q-Q图用于检验残差的正态性，理想情况下散点应沿对角线分布。"
+            "右图展示残差序列应呈现随机波动，无明显趋势或周期性。\n\n"
+            f"![自相关图]({self.figure_paths['acf_pacf']})\n\n"
+            "自相关图（ACF）和偏自相关图（PACF）显示残差序列的相关性，良好的模型应不存在显著的自相关"
+            "（各阶滞后系数应落在置信区间内）。\n\n"
+            f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
+            "DW检验值接近2（当前值{value}）表明残差间不存在显著的一阶自相关。".format(
+                value=f"{durbin_watson(self.model.resid):.2f}")
+        )
+
+        self._add_report_section('模型诊断', diag_content)
+
+        # 统计检验
+        adf_results = {
+            "ADF统计量": ADF(self.data)[0],
+            "p值": ADF(self.data)[1],
+            "差分阶数": self.diff_num
+        }
+        adf_test_text = (
+            "ADF检验用于验证时间序列的平稳性，原假设为存在单位根（非平稳）。"
+            f"当p值小于0.05时拒绝原假设，认为序列已平稳。本案例经过{self.diff_num}次差分后达到平稳状态（p值={ADF(self.data)[1]:.5f}）。"
+        )
+        self._add_report_section('平稳性检验',
+                                 f"{adf_test_text}\n\n{self._build_stat_table('ADF检验', adf_results)}")
+
+        # 模型评价指标
+        metrics = {
+            "AIC": self.model.aic,
+            "BIC": self.model.bic,
+            "HQIC": self.model.hqic
+        }
+        metric_explanation = (
+            "AIC（赤池信息准则）、BIC（贝叶斯信息准则）和HQIC（汉南-奎因信息准则）用于评估模型拟合优度与复杂度的平衡，"
+            "数值越小通常表示模型越优。但这些准则更适用于相同差分阶数下的模型比较。"
+        )
+        self._add_report_section('模型评价',
+                                 f"{metric_explanation}\n\n{self._build_stat_table('信息准则', metrics)}")
+
+        # 保存报告
+        with open('ARIMA_Report.md', 'w', encoding='utf-8') as f:
+            f.write("\n".join(self.report_content))
+
+        # 执行cmd命令转pdf pandoc ARIMA_Report.md -o ARIMA_Report.pdf --pdf-engine=xelatex -V CJKmainfont="SimHei"
+        # 转换为PDF
+        try:
+            import subprocess
+            subprocess.run([
+                'pandoc',
+                'ARIMA_Report.md',
+                '-o', 'ARIMA_Report.pdf',
+                '--pdf-engine=xelatex',
+                '-V', 'CJKmainfont=SimHei'
+            ], check=True)
+            print("PDF报告已生成：ARIMA_Report.pdf")
+        except subprocess.CalledProcessError as e:
+            print(f"PDF转换失败，请确保已安装pandoc和xelatex: {e}")
+        except FileNotFoundError:
+            print("未找到pandoc，请先安装: https://pandoc.org/installing.html")
+
+
+if __name__ == '__main__':
+    # 示例数据加载
+    data = pd.read_csv(
+        r'D:\code\PriceForecast-svn\yuanyouzhoududataset\指标数据.csv', index_col='ds', parse_dates=True)
+    # 示例数据加载
+    # data = pd.read_csv(
+    #     r'D:\code\PriceForecast-svn\juxitingdataset\指标数据.csv', index_col='ds', parse_dates=True)
+
+    # 生成报告
+    reporter = ARIMAReportGenerator(data['y'], forecast_steps=30)
+    reporter.generate_report()
+    print("ARIMA分析报告已生成：ARIMA_Report.md")
--- a/aisenzhecode/沥青/定性模型数据项12-11.xlsx
+++ b/aisenzhecode/沥青/定性模型数据项12-11.xlsx
--- a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl
+++ b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl
--- a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb
+++ b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1122,7 +1122,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1162,76 +1162,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "运行中...\n",
-      "20250408\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:99: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
-      "  df = df.applymap(lambda x: float(x) if isinstance(x, (int, float)) else x)\n",
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:103: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
-      "  df = df.fillna(method='ffill')\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              日期   京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差    生产情况         基质沥青库存  \\\n",
-      "1399  2025-04-06  3600.0  37.3095  1.0 -1051.74  6000.0  107382.182661   \n",
-      "1400  2025-04-07  3500.0   36.784  1.0   478.28  6000.0  107945.963078   \n",
-      "\n",
-      "     下游客户价格预期       即期成本 订单结构       计划产量       京博产量  \n",
-      "1399   3630.0  3136.0033  1.0  4982.8366  5715.5175  \n",
-      "1400   3630.0  2972.5098  1.0  4982.8366   5522.676  \n",
-      "前一天的 3136.0033 <class 'float'>\n",
-      "现在的 2972.5098 <class 'float'>\n",
-      "   index          日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差  生产情况         基质沥青库存  \\\n",
-      "0   1399  2025-04-06  3600  37.3095    1 -1051.74  6000  107382.182661   \n",
-      "1   1400  2025-04-07  3500   36.784    1   478.28  6000  107945.963078   \n",
-      "\n",
-      "  下游客户价格预期       即期成本 订单结构       计划产量       京博产量  \n",
-      "0     3630  3136.0033    1  4982.8366  5715.5175  \n",
-      "1     3630  2972.5098    1  4982.8366   5522.676  \n",
-      "昨日计划提货偏差改之前 478.28\n",
-      "昨日计划提货偏差改之后 539.8394000000008\n",
-      "**************************************************预测结果： 3567.73\n",
-      "更新前一天数据\n",
-      "更新数据前\n",
-      "              日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差  生产情况         基质沥青库存 下游客户价格预期  \\\n",
-      "1401  2025-04-08  3450   36.784    1   478.28  6000  107945.963078     3630   \n",
-      "\n",
-      "           即期成本 订单结构       计划产量      京博产量  \n",
-      "1401  3096.5238    1  4982.8366  5522.676  \n",
-      "日期存在，即将更新\n",
-      "新数据 [3500.0, 36.784, '', 478.28, '', 107945.9630779, '', 2972.5098, '', 4982.8366, 5522.676]\n",
-      "更新数据后\n",
-      "              日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差  生产情况         基质沥青库存 下游客户价格预期  \\\n",
-      "1401  2025-04-08  3450   36.784    1   478.28  6000  107945.963078     3630   \n",
-      "\n",
-      "           即期成本 订单结构       计划产量      京博产量  \n",
-      "1401  3096.5238    1  4982.8366  5522.676  \n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[45], line 15\u001b[0m\n\u001b[0;32m     13\u001b[0m         token \u001b[38;5;241m=\u001b[39m getLogToken()\n\u001b[0;32m     14\u001b[0m         updateYesterdayExcelData(token\u001b[38;5;241m=\u001b[39mtoken)\n\u001b[1;32m---> 15\u001b[0m     time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m     16\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m     17\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m执行失败: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+      "运行中...\n"
     ]
    }
   ],
@ -1265,8 +1203,8 @@
    "\n",
    "\n",
    "\n",
-    "# start_date = datetime(2025, 4, 1)\n",
-    "# end_date = datetime(2025, 4, 2)\n",
+    "# start_date = datetime(2025,5,1)\n",
+    "# end_date = datetime(2025, 5, 7)\n",
    "# token = getLogToken()\n",
    "# while start_date < end_date:\n",
    "#     main(start_date,token)\n",
@ -1317,7 +1255,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -1331,7 +1269,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.7.0"
  }
 },
 "nbformat": 4,
--- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
+++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -10,12 +10,12 @@
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
-       "        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
+       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
-       "                'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
+       "                'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
@ -679,8 +679,6 @@
    "    print(date)\n",
    "    # 更新当月数据\n",
    "    queryDataListItemNos(token)\n",
-    "    # 更新当日数据\n",
-    "    start_3(start_date,token,token_push)\n",
    "    # 训练模型\n",
    "    optimize_Model()\n",
    "    # 预测&上传预测结果\n",
@ -696,9 +694,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "运行中ing...\n",
-      "18:13:56任务失败\n",
-      "18:13:59任务失败\n"
+      "运行中ing...\n"
     ]
    }
   ],
@ -714,10 +710,10 @@
    "\n",
    "        # 判断当前时间是否为执行任务的时间点\n",
    "        try:\n",
-    "            if current_time == \"17:05:00\":\n",
+    "            if current_time == \"09:15:00\":\n",
    "                print(\"执行定时任务\")\n",
    "                main()\n",
-    "            elif current_time == \"17:10:00\":\n",
+    "            elif current_time == \"20:00:00\":\n",
    "                print('更新数据')\n",
    "                start_3()\n",
    "            time.sleep(1)\n",
@ -725,9 +721,9 @@
    "            print(f\"{current_time}任务失败\")\n",
    "\n",
    "\n",
-    "#     # 检测数据准确性, 需要检测放开\n",
-    "#     # check_data(\"100028098|LISTING_PRICE\")\n",
-    "#     # check_data(\"9137070016544622XB|DAY_Yield\")\n"
+    "    # 检测数据准确性, 需要检测放开\n",
+    "    # check_data(\"100028098|LISTING_PRICE\")\n",
+    "    # check_data(\"9137070016544622XB|DAY_Yield\")\n"
   ]
  },
  {
@ -736,14 +732,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# start_date = datetime(2025, 4, 8)\n",
-    "# end_date = datetime(2025, 4, 9)\n",
+    "# start_date = datetime(2025, 4, 1)\n",
+    "# end_date = datetime(2025, 5, 7)\n",
    "# token = get_head_auth()\n",
    "\n",
    "# token_push = get_head_push_auth()\n",
    "\n",
    "# while start_date < end_date:\n",
-    "    # main(start_date,token,token_push)\n",
+    "#     main(start_date,token,token_push)\n",
    "#     start_date += timedelta(days=1)\n"
   ]
  },
@ -936,7 +932,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -950,7 +946,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.7.0"
  }
 },
 "nbformat": 4,
--- a/aisenzhecode/沥青/沥青数据项.xlsx
+++ b/aisenzhecode/沥青/沥青数据项.xlsx
--- a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl
+++ b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl
--- a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
+++ b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
@ -10,12 +10,12 @@
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
-       "        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
+       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
-       "                'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
+       "                'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
@ -500,6 +500,7 @@
    "        tuple: (紧凑日期字符串, 标准日期字符串)\n",
    "    \"\"\"\n",
    "    # 日期解析逻辑\n",
+    "    from datetime import datetime,timedelta\n",
    "    if isinstance(date, datetime):\n",
    "        now = date\n",
    "    else:\n",
@ -695,7 +696,7 @@
    "    # 更新当月数据\n",
    "    queryDataListItemNos(start_date,token)\n",
    "    # 更新当日数据\n",
-    "    # start(date)\n",
+    "    start(date)\n",
    "    # 训练模型\n",
    "    optimize_Model()\n",
    "    # 预测&上传预测结果\n",
@ -705,52 +706,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
-   "source": [
-    "\n",
-    "# if __name__ == \"__main__\":\n",
-    "#     print('运行中')\n",
-    "#     # 需要单独运行放开\n",
-    "# #     start()\n",
-    "# #     start_1(date='2025-01-22')\n",
-    "# #     start_1()\n",
-    "\n",
-    "#     # 每天定时12点运行\n",
-    "#     while True:\n",
-    "#         try:\n",
-    "#             # 获取当前时间\n",
-    "#             current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
-    "#             current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
-    "# #             print(current_time_1)\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "#             # 判断当前时间是否为执行任务的时间点\n",
-    "#             if current_time == \"09:15:00\":\n",
-    "#                 print(\"执行定时任务\")\n",
-    "                # start()\n",
-    "\n",
-    "#                 # 休眠1秒钟，避免过多占用CPU资源\n",
-    "#                 time.sleep(1)\n",
-    "\n",
-    "#             elif current_time_1 == \"20:00:00\":\n",
-    "#                 print(\"更新数据\")\n",
-    "#                 start_1()\n",
-    "#                 time.sleep(1)\n",
-    "#         except:\n",
-    "#             print('执行错误')\n",
-    "#         time.sleep(1)\n",
-    "\n",
-    "\n",
-    "#     # 检测数据准确性, 需要检测放开\n",
-    "#     # check_data(\"100028098|LISTING_PRICE\")\n",
-    "#     # check_data(\"9137070016544622XB|DAY_Yield\")\n"
-   ]
+   "source": []
  },
  {
   "cell_type": "code",
@ -761,103 +722,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "运行中ing...\n",
-      "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0NDE5ODg0NywianRpIjoiZmJlMmI4MzA5NzFmNDBhMzhiZTA5YTZjMDEyZjU4YmQifQ.rGLp0UBfeu5JmoYXbGSgCpkrO2QnlAx8hFbbbDDXC8I\n",
-      "20250409\n",
-      "       dataDate                      dataItemNo  dataValue\n",
-      "0    2025-04-01         100028046|LISTING_PRICE     8208.0\n",
-      "1    2025-04-02         100028046|LISTING_PRICE     8244.0\n",
-      "2    2025-04-03         100028046|LISTING_PRICE     8244.0\n",
-      "3    2025-04-04         100028046|LISTING_PRICE     8165.0\n",
-      "4    2025-04-05         100028046|LISTING_PRICE     8114.0\n",
-      "..          ...                             ...        ...\n",
-      "183  2025-04-07  YHQMXBB|C01100008|STRIKE_PRICE     5180.0\n",
-      "184  2025-04-02  YHQMXBB|C01100008|STRIKE_PRICE     5310.0\n",
-      "185  2025-04-01  YHQMXBB|C01100008|STRIKE_PRICE     5260.0\n",
-      "186  2025-04-04  YHQMXBB|C01100008|STRIKE_PRICE     5230.0\n",
-      "187  2025-04-05  YHQMXBB|C01100008|STRIKE_PRICE     5180.0\n",
-      "\n",
-      "[188 rows x 3 columns]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:614: DeprecationWarning:\n",
-      "\n",
-      "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "当月数据更新完成\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:255: UserWarning:\n",
-      "\n",
-      "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
-      "\n",
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:257: UserWarning:\n",
-      "\n",
-      "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using matplotlib backend: QtAgg\n",
-      "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
-      "Populating the interactive namespace from numpy and matplotlib\n",
-      "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
-      "\n",
-      "pylab import has clobbered these variables: ['plot', 'random', '__version__', 'datetime']\n",
-      "`%matplotlib` prevents importing * from pylab and numpy\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Date\n",
-      "2025-04-09    5179.792969\n",
-      "Name: 日度预测价格, dtype: float32\n",
-      "预测值： 5179.79\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:203: UserWarning:\n",
-      "\n",
-      "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
-      "\n",
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:205: UserWarning:\n",
-      "\n",
-      "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
-      "\n",
-      "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:237: FutureWarning:\n",
-      "\n",
-      "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
-      "\n"
+      "运行中ing...\n"
     ]
    }
   ],
@ -866,19 +731,19 @@
    "if __name__ == \"__main__\":\n",
    "    print(\"运行中ing...\")\n",
    "    # 每天定时12点运行\n",
-    "    # while True:\n",
-    "    #     # 获取当前时间\n",
-    "    #     current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
-    "    #     try:\n",
-    "    #         # 判断当前时间是否为执行任务的时间点\n",
-    "    #         if current_time == \"12:00:00\":\n",
-    "    #             print(\"执行定时任务\")\n",
-    "    #             main()\n",
-    "    #         elif current_time == \"20:00:00\":\n",
-    "    #             start_1()\n",
-    "    #         time.sleep(1)\n",
-    "    #     except:\n",
-    "    #         print(f\"{current_time}执行失败\")\n",
+    "    while True:\n",
+    "        # 获取当前时间\n",
+    "        current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
+    "        try:\n",
+    "            # 判断当前时间是否为执行任务的时间点\n",
+    "            if current_time == \"09:15:00\":\n",
+    "                print(\"执行定时任务\")\n",
+    "                main()\n",
+    "            elif current_time == \"20:00:00\":\n",
+    "                start_1()\n",
+    "            time.sleep(1)\n",
+    "        except:\n",
+    "            print(f\"{current_time}执行失败\")\n",
    "\n",
    "    # 检测数据准确性, 需要检测放开\n",
    "    # check_data(\"100028098|LISTING_PRICE\")\n",
@ -887,24 +752,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# start_date = datetime(2025, 4, 2)\n",
-    "# end_date = datetime(2025, 4, 3)\n",
+    "# start_date = datetime(2025, 4, 1)\n",
+    "# end_date = datetime(2025, 5, 7)\n",
    "# token = get_head_auth()\n",
    "\n",
    "# while start_date < end_date:\n",
    "#     date = start_date.strftime('%Y%m%d')\n",
    "#     date2 = start_date.strftime('%Y-%m-%d')\n",
    "#     queryDataListItemNos(date=start_date,token=token)\n",
-    "#     updateYesterdayExcelData(date=date2,token=token)\n",
-    "#     start(date)\n",
+    "#     main(start_date=start_date,token=token,token_push=token)\n",
    "#     # # time.sleep(1)\n",
    "#     # start_1(start_date)\n",
    "#     start_date += timedelta(days=1)\n",
-    "#     time.sleep(5)\n",
+    "#     time.sleep(2)\n",
    "\n",
    "# # print(price_list)"
   ]
@ -919,7 +783,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "base",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -933,7 +797,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.7.0"
  }
 },
 "nbformat": 4,
--- a/aisenzhecode/液化石油气/液化气数据.xlsx
+++ b/aisenzhecode/液化石油气/液化气数据.xlsx
--- a/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl
+++ b/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl
--- a/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb
+++ b/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb
@ -5,17 +5,27 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n",
+      "\n",
+      "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
+      "\n"
+     ]
+    },
    {
     "data": {
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
-       "        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
+       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
-       "                'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
+       "                'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
@ -33,8 +43,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "获取到的数据项ID['数据项编码', 'C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n",
-      "获取到的数据项ID['C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n"
+      "运行中ing\n"
     ]
    }
   ],
@ -209,6 +218,7 @@
    "        tuple: (紧凑日期字符串, 标准日期字符串)\n",
    "    \"\"\"\n",
    "    # 日期解析逻辑\n",
+    "    from datetime import datetime,timedelta\n",
    "    if isinstance(date, datetime):\n",
    "        now = date\n",
    "    else:\n",
@ -265,22 +275,22 @@
    "\n",
    "def upload_data_to_system(token_push,date):\n",
    "    datavalue = forecast_price()\n",
-    "    # data = {\n",
-    "    #     \"funcModule\": \"数据表信息列表\",\n",
-    "    #     \"funcOperation\": \"新增\",\n",
-    "    #     \"data\": [\n",
-    "    #         {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
-    "    #          \"dataDate\": get_cur_time(date)[0],\n",
-    "    #          \"dataStatus\": \"add\",\n",
-    "    #          \"dataValue\": datavalue\n",
-    "    #          }\n",
+    "    data = {\n",
+    "        \"funcModule\": \"数据表信息列表\",\n",
+    "        \"funcOperation\": \"新增\",\n",
+    "        \"data\": [\n",
+    "            {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
+    "             \"dataDate\": get_cur_time(date)[0],\n",
+    "             \"dataStatus\": \"add\",\n",
+    "             \"dataValue\": datavalue\n",
+    "             }\n",
    "\n",
-    "    #     ]\n",
-    "    # }\n",
-    "    # print(data)\n",
-    "    # headers = {\"Authorization\": token_push}\n",
-    "    # res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
-    "    # print(res.text)\n",
+    "        ]\n",
+    "    }\n",
+    "    print(data)\n",
+    "    headers = {\"Authorization\": token_push}\n",
+    "    res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
+    "    print(res.text)\n",
    "\n",
    "    \n",
    "# def upload_data_to_system(token):\n",
@ -564,7 +574,8 @@
    "        return\n",
    "\n",
    "    # data_list = [two_cols, one_cols]\n",
-    "    append_rows = [getNow()[1]]\n",
+    "    append_rows = [getNow(date)[1]]\n",
+    "#     append_rows = [getNow()[1]]\n",
    "    dataItemNo_dataValue = {}\n",
    "    for data_value in datas:\n",
    "        if \"dataValue\" not in data_value:\n",
@ -844,6 +855,7 @@
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
+    "    print('运行中ing')\n",
    "\n",
    "    # 每天定时12点运行\n",
    "    while True:\n",
@ -875,14 +887,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# # 自定义日期执行预测\n",
+    "# # # 自定义日期执行预测\n",
    "\n",
-    "# start_date = datetime(2025, 4, 8)\n",
-    "# end_date = datetime(2025, 4, 9)\n",
+    "# start_date = datetime(2025, 5, 1)\n",
+    "# end_date = datetime(2025, 5, 7)\n",
    "\n",
    "# token = get_head_auth()\n",
    "# token_push = get_head_push_auth()\n",
@ -904,7 +916,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "base",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -918,7 +930,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.7.0"
  }
 },
 "nbformat": 4,
--- a/aisenzhecode/石油苯/纯苯数据项.xls
+++ b/aisenzhecode/石油苯/纯苯数据项.xls
--- a/config_juxiting.py
+++ b/config_juxiting.py
@ -111,15 +111,15 @@ data = {
    "IndexCode": "",
    "IndexName": "价格预测模型",
    "Unit": "无",
-            "Frequency": "日度",
-            "SourceName": f"价格预测",
-            "Remark": 'ddd',
-            "DataList": [
-                {
-                    "Date": "2024-05-02",
-                    "Value": 333444
-                }
-            ]
+    "Frequency": "日度",
+    "SourceName": f"价格预测",
+    "Remark": 'ddd',
+    "DataList": [
+        {
+            "Date": "2024-05-02",
+            "Value": 333444
+        }
+    ]
 }

 # eta 分类
@ -242,7 +242,7 @@ table_name = 'v_tbl_crude_oil_warning'
 # 开关
 is_train = True  # 是否训练
 is_debug = False  # 是否调试
-is_eta = False  # 是否使用eta接口
+is_eta = True  # 是否使用eta接口
 is_market = False  # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
 is_timefurture = True  # 是否使用时间特征
 is_fivemodels = False  # 是否使用之前保存的最佳的5个模型
--- a/lib/dataread.py
+++ b/lib/dataread.py
@ -491,14 +491,18 @@ def featurePindu(dataset):
    featureInfo += ', 详看 附1、特征列表'

    featureInfo += '''
-        数据特征工程：
-    1. 数据日期排序，新日期在最后
-    2. 删除空列，特征数据列没有值，就删除
-    3. 删除近两月不再更新值的指标
-    4. 非日度数据填充为日度数据，填充规则：
-    -- 向后填充，举例：假设周五出现一个周度指标数据，那么在这之前的数据用上周五的数据
-    -- 向前填充，举例：采集数据开始日期为2018年1月1日，那么周度数据可能是2018年1月3日，那么3日的数据向前填充，使1日2日都有数值
-        数据特征相关性分析：
+        时序数据工程处理：
+        1. 时序对齐处理
+        - 按观测时间升序排列，确保最新数据位于序列末端
+        2. 空值特征筛除
+        - 删除全空值特征列（缺失率=100%）
+        3. 失效指标剔除
+        - 基于最近两个月数据更新状态，移除停止更新的指标
+        4. 多粒度特征时序化处理
+        - 非日度指标采用前向/后向插值法转化为日频数据：
+            ▶ 后向填充：当周度指标在周五更新时，用上周五值填充当周前四日
+            ▶ 前向填充：对数据采集起始日的空白值，采用首个有效值前推填充
+    数据特征相关性分析：
    '''
    config.logger.info(featureInfo)
    with open(os.path.join(dataset, '特征频度统计.txt'), 'w', encoding='utf-8') as f:
--- a/lib/tools.py
+++ b/lib/tools.py
@ -1,30 +1,46 @@
-import time
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-from sklearn import metrics
-import random, string, base64, hmac, hashlib
-from reportlab.pdfbase import pdfmetrics   # 注册字体
-from reportlab.pdfbase.ttfonts import TTFont # 字体类
-from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image  # 报告内容相关类
-from reportlab.lib.pagesizes import letter  # 页面的标志尺寸(8.5*inch, 11*inch)
-from reportlab.lib.styles import getSampleStyleSheet  # 文本样式
-from reportlab.lib import colors  # 颜色模块
-from reportlab.graphics.charts.barcharts import VerticalBarChart  # 图表类
-from reportlab.graphics.charts.legends import Legend  # 图例类
-from reportlab.graphics.shapes import Drawing  # 绘图工具
-from reportlab.lib.units import cm  # 单位：cm
-import smtplib
-from email.mime.text import MIMEText
-from email.mime.multipart import MIMEMultipart
-import sqlite3
-import pymysql
-import tkinter as tk
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain.chains.summarize import load_summarize_chain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredURLLoader
+from langchain_core.prompts import PromptTemplate
 from tkinter import messagebox
+import tkinter as tk
+import pymysql
+import sqlite3
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import smtplib
+from reportlab.lib.units import cm  # 单位：cm
+from reportlab.graphics.shapes import Drawing  # 绘图工具
+from reportlab.graphics.charts.legends import Legend  # 图例类
+from reportlab.graphics.charts.barcharts import VerticalBarChart  # 图表类
+from reportlab.lib import colors  # 颜色模块
+from reportlab.lib.styles import getSampleStyleSheet  # 文本样式
+from reportlab.lib.pagesizes import letter  # 页面的标志尺寸(8.5*inch, 11*inch)
+from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image  # 报告内容相关类
+from reportlab.pdfbase.ttfonts import TTFont  # 字体类
+from reportlab.pdfbase import pdfmetrics   # 注册字体
+import hashlib
+import hmac
+import base64
+import string
+import random
+from sklearn import metrics
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import os
+import time
+import logging
+from dotenv import load_dotenv
+load_dotenv()
+

 global logger
+
+
 def timeit(func):
    '''计时装饰器'''
    def wrapper(*args, **kwargs):
@ -36,10 +52,12 @@ def timeit(func):
        return result
    return wrapper

+
 class BinanceAPI:
    '''
    获取 Binance API 请求头签名
    '''
+
    def __init__(self, APPID, SECRET):
        self.APPID = APPID
        self.SECRET = SECRET
@ -47,7 +65,8 @@ class BinanceAPI:

    # 生成随机字符串作为 nonce
    def generate_nonce(self, length=32):
-        self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
+        self.nonce = ''.join(random.choices(
+            string.ascii_letters + string.digits, k=length))
        return self.nonce

    # 获取当前时间戳（秒）
@ -124,7 +143,7 @@ class Graphs:

    # 绘制表格
    @staticmethod
-    def draw_table(col_width,*args):
+    def draw_table(col_width, *args):
        # 列宽度
        col_width = col_width
        style = [
@ -196,6 +215,8 @@ def mse(y_true, y_pred):

    return res_mse
 # RMSE
+
+
 def rmse(y_true, y_pred):

    res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
@ -203,6 +224,8 @@ def rmse(y_true, y_pred):
    return res_rmse

 # MAE
+
+
 def mae(y_true, y_pred):

    res_mae = metrics.mean_absolute_error(y_true, y_pred)
@ -211,6 +234,8 @@ def mae(y_true, y_pred):

 # sklearn的库中没有MAPE和SMAPE，下面根据公式给出算法实现
 # MAPE
+
+
 def mape(y_true, y_pred):

    res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
@ -218,13 +243,18 @@ def mape(y_true, y_pred):
    return res_mape

 # SMAPE
+
+
 def smape(y_true, y_pred):

-    res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
+    res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) /
+                              (np.abs(y_pred) + np.abs(y_true))) * 100

    return res_smape

 # 相关系数绘制
+
+
 def plot_corr(data, size=11):
    # 去掉ds列
    data.drop(columns=['ds'], inplace=True)
@ -234,10 +264,11 @@ def plot_corr(data, size=11):

    # 计算各特征与目标列的皮尔逊相关系数，并保存到新的 DataFrame 中
    for col in data.columns:
-        if col!= 'y':
+        if col != 'y':
            pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
            spearman_correlation, _ = spearmanr(data[col], data['y'])
-            new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
+            new_row = {'Feature': col, 'Pearson_Correlation': round(
+                pearson_correlation, 3), 'Spearman_Correlation': round(spearman_correlation, 2)}
            correlation_df = correlation_df._append(new_row, ignore_index=True)
    # 删除空列
    correlation_df.drop('Correlation', axis=1, inplace=True)
@ -248,9 +279,10 @@ def plot_corr(data, size=11):
    # 生成 -1 到 1 的 20 个区间
    bins = np.linspace(-1, 1, 21)
    # 计算每个区间的统计数（这里是区间内数据的数量）
-    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
+    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
+                   for i in range(len(bins) - 1)]

-    #设置画布大小
+    # 设置画布大小
    plt.figure(figsize=(10, 6))
    # 绘制直方图
    plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
@ -262,12 +294,12 @@ def plot_corr(data, size=11):
    plt.savefig('皮尔逊相关性系数.png')
    plt.close()

-
-    #设置画布大小
+    # 设置画布大小
    plt.figure(figsize=(10, 6))
    data = correlation_df['Spearman_Correlation'].values.tolist()
    # 计算每个区间的统计数（这里是区间内数据的数量）
-    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
+    hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
+                   for i in range(len(bins) - 1)]

    # 绘制直方图
    plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
@ -282,9 +314,9 @@ def plot_corr(data, size=11):

 # 邮件封装
 class SendMail(object):
-    def __init__(self,username,passwd,recv,title,content,
-                 file=None,ssl=False,
-                 email_host='smtp.qq.com',port=25,ssl_port=465):
+    def __init__(self, username, passwd, recv, title, content,
+                 file=None, ssl=False,
+                 email_host='smtp.qq.com', port=25, ssl_port=465):
        '''
        :param username: 用户名
        :param passwd: 密码
@ -297,62 +329,67 @@ class SendMail(object):
        :param port: 非安全链接端口，默认为25
        :param ssl_port: 安全链接端口，默认为465
        '''
-        self.username = username #用户名
-        self.passwd = passwd #密码
-        self.recv = recv #收件人，多个要传list ['a@qq.com','b@qq.com]
-        self.title = title #邮件标题
-        self.content = content #邮件正文
-        self.file = file #附件路径，如果不在当前目录下，要写绝对路径
-        self.email_host = email_host #smtp服务器地址
-        self.port = port #普通端口
-        self.ssl = ssl #是否安全链接
-        self.ssl_port = ssl_port #安全链接端口
+        self.username = username  # 用户名
+        self.passwd = passwd  # 密码
+        self.recv = recv  # 收件人，多个要传list ['a@qq.com','b@qq.com]
+        self.title = title  # 邮件标题
+        self.content = content  # 邮件正文
+        self.file = file  # 附件路径，如果不在当前目录下，要写绝对路径
+        self.email_host = email_host  # smtp服务器地址
+        self.port = port  # 普通端口
+        self.ssl = ssl  # 是否安全链接
+        self.ssl_port = ssl_port  # 安全链接端口
+
    def send_mail(self):
        msg = MIMEMultipart()
-        #发送内容的对象
-        if self.file:#处理附件的
-            file_name = os.path.split(self.file)[-1]#只取文件名，不取路径
+        # 发送内容的对象
+        if self.file:  # 处理附件的
+            file_name = os.path.split(self.file)[-1]  # 只取文件名，不取路径
            try:
                f = open(self.file, 'rb').read()
            except Exception as e:
                raise Exception('附件打不开！！！！')
            else:
-                att = MIMEText(f,"base64", "utf-8")
+                att = MIMEText(f, "base64", "utf-8")
                att["Content-Type"] = 'application/octet-stream'
-                #base64.b64encode(file_name.encode()).decode()
-                new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
-                #这里是处理文件名为中文名的，必须这么写
-                att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
+                # base64.b64encode(file_name.encode()).decode()
+                new_file_name = '=?utf-8?b?' + \
+                    base64.b64encode(file_name.encode()).decode() + '?='
+                # 这里是处理文件名为中文名的，必须这么写
+                att["Content-Disposition"] = 'attachment; filename="%s"' % (
+                    new_file_name)
                msg.attach(att)
-        msg.attach(MIMEText(self.content))#邮件正文的内容
+        msg.attach(MIMEText(self.content))  # 邮件正文的内容
        msg['Subject'] = self.title  # 邮件主题
        msg['From'] = self.username  # 发送者账号
        msg['To'] = ','.join(self.recv)  # 接收者账号列表
        if self.ssl:
-            self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
+            self.smtp = smtplib.SMTP_SSL(self.email_host, port=self.ssl_port)
        else:
-            self.smtp = smtplib.SMTP(self.email_host,port=self.port)
-        #发送邮件服务器的对象
-        self.smtp.login(self.username,self.passwd)
+            self.smtp = smtplib.SMTP(self.email_host, port=self.port)
+        # 发送邮件服务器的对象
+        self.smtp.login(self.username, self.passwd)
        try:
-            self.smtp.sendmail(self.username,self.recv,msg.as_string())
+            self.smtp.sendmail(self.username, self.recv, msg.as_string())
            pass
        except Exception as e:
-            print('出错了。。',e)
-            logger.info('邮件服务出错了。。',e)
+            print('出错了。。', e)
+            logger.info('邮件服务出错了。。', e)
        else:
            print('发送成功！')
        self.smtp.quit()

+
 def dateConvert(df, datecol='ds'):
    # 将date列转换为datetime类型
    try:
-        df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
+        df[datecol] = pd.to_datetime(df[datecol], format=r'%Y-%m-%d')
    except:
-        df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
+        df[datecol] = pd.to_datetime(df[datecol], format=r'%Y/%m/%d')
    return df

-def save_to_database(sqlitedb,df,dbname,end_time):
+
+def save_to_database(sqlitedb, df, dbname, end_time):
    '''
    create_dt ,ds  判断数据是否存在，不存在则插入，存在则更新
    '''
@ -361,20 +398,25 @@ def save_to_database(sqlitedb,df,dbname,end_time):
        df['ds'] = df['ds'].dt.strftime('%Y-%m-%d')

    if not sqlitedb.check_table_exists(dbname):
-        df.to_sql(dbname,sqlitedb.connection,index=False)
+        df.to_sql(dbname, sqlitedb.connection, index=False)
    else:
        for col in df.columns:
-                sqlitedb.add_column_if_not_exists(dbname,col,'TEXT')
+            sqlitedb.add_column_if_not_exists(dbname, col, 'TEXT')
        for row in df.itertuples(index=False):
            row_dict = row._asdict()
-            columns=row_dict.keys()
-            check_query = sqlitedb.select_data(dbname,where_condition = f"ds = '{row.ds}' and created_dt = '{end_time}'")
+            columns = row_dict.keys()
+            check_query = sqlitedb.select_data(
+                dbname, where_condition=f"ds = '{row.ds}' and created_dt = '{end_time}'")
            if len(check_query) > 0:
-                set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
-                sqlitedb.update_data(dbname,set_clause,where_condition = f"ds = '{row.ds} and created_dt = {end_time}'")
+                set_clause = ", ".join(
+                    [f"{key} = '{value}'" for key, value in row_dict.items()])
+                sqlitedb.update_data(
+                    dbname, set_clause, where_condition=f"ds = '{row.ds} and created_dt = {end_time}'")
                continue
            else:
-                sqlitedb.insert_data(dbname,tuple(row_dict.values()),columns=columns)
+                sqlitedb.insert_data(dbname, tuple(
+                    row_dict.values()), columns=columns)
+

 class SQLiteHandler:
    def __init__(self, db_name):
@ -426,7 +468,8 @@ class SQLiteHandler:
            query += f" LIMIT {limit}"
        results = self.execute_query(query).fetchall()
        if results:
-            headers = [description[0] for description in self.execute_query(query).description]
+            headers = [description[0]
+                       for description in self.execute_query(query).description]
            return pd.DataFrame(results, columns=headers)
        else:
            return pd.DataFrame()
@ -463,14 +506,13 @@ class SQLiteHandler:
            query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
            self.execute_query(query)
            self.commit()
-            print(f"Column '{column_name}' added to table '{table_name}' successfully.")
+            print(
+                f"Column '{column_name}' added to table '{table_name}' successfully.")
        else:
-            print(f"Column '{column_name}' already exists in table '{table_name}'.")
+            print(
+                f"Column '{column_name}' already exists in table '{table_name}'.")


-
-
-import logging
 class MySQLDB:
    def __init__(self, host, user, password, database):
        self.host = host
@ -538,17 +580,20 @@ class MySQLDB:
            self.connection.close()
        logging.info("Database connection closed.")

+
 def exception_logger(func):
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            # 记录异常日志
-            logging.error(f"An error occurred in function {func.__name__}: {str(e)}")
+            logging.error(
+                f"An error occurred in function {func.__name__}: {str(e)}")
            # 可以选择重新抛出异常，或者在这里处理异常
            raise e  # 重新抛出异常
    return wrapper

+
 def get_week_date(end_time):
    '''
    获取上上周五，上周周一周二周三周四周五的日期
@ -560,7 +605,40 @@ def get_week_date(end_time):
    up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]
    create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
    ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
-    return create_dates,ds_dates
+    return create_dates, ds_dates
+
+
+class DeepSeek():
+    def __init__(self):
+        pass
+
+    def summary(self, text):
+        prompt_template = '''请根据以下ARIMA预测结果分析未来的趋势：
+        "{text}"
+
+        请用专业且结构清晰的中文撰写，重点数据用**加粗**显示
+        '''
+        chinese_prompt = PromptTemplate(
+            template=prompt_template, input_variables=['text'])
+
+        docs = [Document(page_content=text, metadata={
+                         "source": "arima_forecast"})]
+
+        apikey = os.environ.get('OPENAI_API_KEY')
+        llm = ChatOpenAI(
+            model="deepseek-chat",
+            temperature=0,
+            base_url="https://api.deepseek.com/v1",
+            api_key=os.environ.get('OPENAI_API_KEY')
+        )
+        chain = load_summarize_chain(llm, prompt=chinese_prompt)
+        print('大语言模型分析预测结果')
+
+        summary = chain.invoke({"input_documents": docs})['output_text']
+        print('大语言模型分析结果：')
+        print(summary)
+        return summary
+

 if __name__ == '__main__':
    print('This is a tool, not a script.')
--- a/models/nerulforcastmodels.py
+++ b/models/nerulforcastmodels.py
@ -3297,8 +3297,9 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
    # 特征、模型、参数配置
    content.append(Graphs.draw_little_title('模型选择：'))
    content.append(Graphs.draw_text(
-        f'本次预测使用了一个专门收集时间序列的NeuralForecast库中的{num_models}个模型：'))
-    content.append(Graphs.draw_text(f'使用40天的数据预测未来{inputsize}天的数据。'))
+        f'本次预测调用专用于时间序列预测的NeuralForecast库中{num_models}个模型：'))
+    content.append(Graphs.draw_text(
+        f'基于40天历史数据构建多维时间窗口，采用注意力机制预测未来{inputsize}天趋势'))
    content.append(Graphs.draw_little_title('指标情况：'))
    with open(os.path.join(config.dataset, '特征频度统计.txt'), encoding='utf-8') as f:
        for line in f.readlines():