埃森哲代码发版
This commit is contained in:
parent
5a8e3ae007
commit
91abe4cfb7
240
ARIMAreport.py
Normal file
240
ARIMAreport.py
Normal file
@ -0,0 +1,240 @@
|
||||
from __future__ import annotations
|
||||
import pdfkit
|
||||
from bs4 import BeautifulSoup
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import statsmodels.api as sm
|
||||
from statsmodels.tsa.stattools import adfuller as ADF
|
||||
from statsmodels.stats.diagnostic import acorr_ljungbox
|
||||
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
|
||||
from statsmodels.tsa.arima.model import ARIMA
|
||||
from statsmodels.graphics.api import qqplot
|
||||
from statsmodels.stats.stattools import durbin_watson
|
||||
from scipy import stats
|
||||
import warnings
|
||||
|
||||
from lib.tools import DeepSeek
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
|
||||
class ARIMAReportGenerator(DeepSeek):
|
||||
def __init__(self, data, forecast_steps=7):
|
||||
super().__init__()
|
||||
self.data = data
|
||||
self.forecast_steps = forecast_steps
|
||||
self.model = None
|
||||
self.diff_num = 0
|
||||
self.report_content = []
|
||||
self.figure_paths = {}
|
||||
|
||||
def _save_figure(self, fig_name):
|
||||
"""统一保存图表并记录路径"""
|
||||
path = f"{fig_name}.png"
|
||||
plt.savefig(path, dpi=300, bbox_inches='tight')
|
||||
plt.close()
|
||||
self.figure_paths[fig_name] = path
|
||||
return path
|
||||
|
||||
def _add_report_section(self, title, content, level=2):
|
||||
"""添加报告章节"""
|
||||
self.report_content.append(f"{'#'*level} {title}\n{content}\n")
|
||||
|
||||
def plot_forecast(self, predicted_mean, conf_int):
|
||||
"""预测结果可视化"""
|
||||
plt.figure(figsize=(12, 6))
|
||||
plt.plot(self.data[-30:], label='历史数据')
|
||||
plt.plot(predicted_mean, label='预测值', color='r')
|
||||
plt.fill_between(conf_int.index,
|
||||
conf_int['lower'],
|
||||
conf_int['upper'],
|
||||
color='r', alpha=0.2)
|
||||
plt.title('ARIMA模型预测结果')
|
||||
plt.legend()
|
||||
self._save_figure('forecast_plot')
|
||||
|
||||
def generate_diagnostic_plots(self):
|
||||
"""生成诊断图表集"""
|
||||
# 残差诊断图
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
||||
qqplot(self.model.resid, line='q', ax=ax1)
|
||||
ax1.set_title('Q-Q图')
|
||||
self.model.resid.plot(ax=ax2, title='残差序列')
|
||||
self._save_figure('residual_diagnostic')
|
||||
|
||||
# ACF/PACF图
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
|
||||
plot_acf(self.model.resid, ax=ax1, lags=20)
|
||||
plot_pacf(self.model.resid, ax=ax2, lags=20)
|
||||
self._save_figure('acf_pacf')
|
||||
|
||||
def build_model(self):
|
||||
"""模型构建与诊断"""
|
||||
# 差分平稳化处理
|
||||
diff_data = self.data.copy()
|
||||
while ADF(diff_data)[1] > 0.05:
|
||||
diff_data = diff_data.diff().dropna()
|
||||
self.diff_num += 1
|
||||
|
||||
# 自动定阶(示例使用AIC准则)
|
||||
aic_results = sm.tsa.arma_order_select_ic(
|
||||
diff_data, max_ar=4, max_ma=4, ic='aic')
|
||||
p, q = aic_results['aic_min_order']
|
||||
|
||||
# 模型训练
|
||||
self.model = ARIMA(self.data, order=(p, self.diff_num, q)).fit()
|
||||
|
||||
# 生成预测
|
||||
forecast = self.model.get_forecast(steps=self.forecast_steps)
|
||||
|
||||
# 生成工作日日期索引
|
||||
last_date = self.data.index[-1].normalize()
|
||||
forecast_dates = pd.date_range(
|
||||
start=last_date + pd.Timedelta(days=1),
|
||||
periods=self.forecast_steps,
|
||||
freq='B' # B表示工作日
|
||||
).normalize()
|
||||
# 设置预测结果日期索引
|
||||
predicted_mean = pd.Series(
|
||||
forecast.predicted_mean.values,
|
||||
index=forecast_dates,
|
||||
name='predicted_mean'
|
||||
)
|
||||
conf_int = pd.DataFrame(
|
||||
forecast.conf_int().values,
|
||||
index=forecast_dates,
|
||||
columns=['lower', 'upper']
|
||||
)
|
||||
variance_series = pd.Series(
|
||||
forecast.se_mean.values,
|
||||
index=forecast_dates,
|
||||
name='std_error'
|
||||
)
|
||||
|
||||
# 保存预测结果
|
||||
predicted_mean.to_csv('ARIMA预测结果.csv')
|
||||
|
||||
# 生成图表
|
||||
self.plot_forecast(predicted_mean, conf_int)
|
||||
self.generate_diagnostic_plots()
|
||||
|
||||
return predicted_mean, conf_int, variance_series
|
||||
|
||||
def _build_stat_table(self, test_name, results):
|
||||
"""构建统计检验表格"""
|
||||
return pd.DataFrame(results.items(), columns=['指标', '值']).to_markdown(index=False)
|
||||
|
||||
def generate_report(self):
|
||||
"""生成完整报告"""
|
||||
# 预测结果
|
||||
predicted_mean, conf_int, variance_series = self.build_model()
|
||||
|
||||
aifengxi = self.summary(predicted_mean.to_markdown(index=False))
|
||||
|
||||
# 创建带日期索引的汇总表格
|
||||
summary_df = pd.DataFrame({
|
||||
'mean': predicted_mean.rename(None),
|
||||
'mean_se': variance_series.rename(None),
|
||||
'mean_ci_lower': conf_int['lower'].values,
|
||||
'mean_ci_upper': conf_int['upper'].values
|
||||
}, index=predicted_mean.index.normalize().strftime('%Y-%m-%d'))
|
||||
forecast_table = summary_df.to_markdown()
|
||||
self._add_report_section('核心预测结果',
|
||||
f"\n\n"
|
||||
"该图表展示了历史数据(蓝线)与模型预测值(红线),阴影区域表示95%置信区间。"
|
||||
f"预测区间显示随着预测步长增加,不确定性逐渐扩大。\n\n{forecast_table}")
|
||||
|
||||
self._add_report_section('预测结果AI分析',
|
||||
aifengxi)
|
||||
# 模型诊断
|
||||
diag_content = (
|
||||
f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
|
||||
f"\n\n"
|
||||
"左图Q-Q图用于检验残差的正态性,理想情况下散点应沿对角线分布。"
|
||||
"右图展示残差序列应呈现随机波动,无明显趋势或周期性。\n\n"
|
||||
f"\n\n"
|
||||
"自相关图(ACF)和偏自相关图(PACF)显示残差序列的相关性,良好的模型应不存在显著的自相关"
|
||||
"(各阶滞后系数应落在置信区间内)。\n\n"
|
||||
f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
|
||||
"DW检验值接近2(当前值{value})表明残差间不存在显著的一阶自相关。".format(
|
||||
value=f"{durbin_watson(self.model.resid):.2f}")
|
||||
)
|
||||
|
||||
diag_content = (
|
||||
f"**模型阶数**: ARIMA({self.model.model.order})\n\n"
|
||||
f"\n\n"
|
||||
"左图Q-Q图用于检验残差的正态性,理想情况下散点应沿对角线分布。"
|
||||
"右图展示残差序列应呈现随机波动,无明显趋势或周期性。\n\n"
|
||||
f"\n\n"
|
||||
"自相关图(ACF)和偏自相关图(PACF)显示残差序列的相关性,良好的模型应不存在显著的自相关"
|
||||
"(各阶滞后系数应落在置信区间内)。\n\n"
|
||||
f"**DW检验**: {durbin_watson(self.model.resid):.2f}\n"
|
||||
"DW检验值接近2(当前值{value})表明残差间不存在显著的一阶自相关。".format(
|
||||
value=f"{durbin_watson(self.model.resid):.2f}")
|
||||
)
|
||||
|
||||
self._add_report_section('模型诊断', diag_content)
|
||||
|
||||
# 统计检验
|
||||
adf_results = {
|
||||
"ADF统计量": ADF(self.data)[0],
|
||||
"p值": ADF(self.data)[1],
|
||||
"差分阶数": self.diff_num
|
||||
}
|
||||
adf_test_text = (
|
||||
"ADF检验用于验证时间序列的平稳性,原假设为存在单位根(非平稳)。"
|
||||
f"当p值小于0.05时拒绝原假设,认为序列已平稳。本案例经过{self.diff_num}次差分后达到平稳状态(p值={ADF(self.data)[1]:.5f})。"
|
||||
)
|
||||
self._add_report_section('平稳性检验',
|
||||
f"{adf_test_text}\n\n{self._build_stat_table('ADF检验', adf_results)}")
|
||||
|
||||
# 模型评价指标
|
||||
metrics = {
|
||||
"AIC": self.model.aic,
|
||||
"BIC": self.model.bic,
|
||||
"HQIC": self.model.hqic
|
||||
}
|
||||
metric_explanation = (
|
||||
"AIC(赤池信息准则)、BIC(贝叶斯信息准则)和HQIC(汉南-奎因信息准则)用于评估模型拟合优度与复杂度的平衡,"
|
||||
"数值越小通常表示模型越优。但这些准则更适用于相同差分阶数下的模型比较。"
|
||||
)
|
||||
self._add_report_section('模型评价',
|
||||
f"{metric_explanation}\n\n{self._build_stat_table('信息准则', metrics)}")
|
||||
|
||||
# 保存报告
|
||||
with open('ARIMA_Report.md', 'w', encoding='utf-8') as f:
|
||||
f.write("\n".join(self.report_content))
|
||||
|
||||
# 执行cmd命令转pdf pandoc ARIMA_Report.md -o ARIMA_Report.pdf --pdf-engine=xelatex -V CJKmainfont="SimHei"
|
||||
# 转换为PDF
|
||||
try:
|
||||
import subprocess
|
||||
subprocess.run([
|
||||
'pandoc',
|
||||
'ARIMA_Report.md',
|
||||
'-o', 'ARIMA_Report.pdf',
|
||||
'--pdf-engine=xelatex',
|
||||
'-V', 'CJKmainfont=SimHei'
|
||||
], check=True)
|
||||
print("PDF报告已生成:ARIMA_Report.pdf")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"PDF转换失败,请确保已安装pandoc和xelatex: {e}")
|
||||
except FileNotFoundError:
|
||||
print("未找到pandoc,请先安装: https://pandoc.org/installing.html")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 示例数据加载
|
||||
data = pd.read_csv(
|
||||
r'D:\code\PriceForecast-svn\yuanyouzhoududataset\指标数据.csv', index_col='ds', parse_dates=True)
|
||||
# 示例数据加载
|
||||
# data = pd.read_csv(
|
||||
# r'D:\code\PriceForecast-svn\juxitingdataset\指标数据.csv', index_col='ds', parse_dates=True)
|
||||
|
||||
# 生成报告
|
||||
reporter = ARIMAReportGenerator(data['y'], forecast_steps=30)
|
||||
reporter.generate_report()
|
||||
print("ARIMA分析报告已生成:ARIMA_Report.md")
|
Binary file not shown.
Binary file not shown.
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -1122,7 +1122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -1162,76 +1162,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"运行中...\n",
|
||||
"20250408\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:99: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
|
||||
" df = df.applymap(lambda x: float(x) if isinstance(x, (int, float)) else x)\n",
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_25972\\2961115944.py:103: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
|
||||
" df = df.fillna(method='ffill')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 \\\n",
|
||||
"1399 2025-04-06 3600.0 37.3095 1.0 -1051.74 6000.0 107382.182661 \n",
|
||||
"1400 2025-04-07 3500.0 36.784 1.0 478.28 6000.0 107945.963078 \n",
|
||||
"\n",
|
||||
" 下游客户价格预期 即期成本 订单结构 计划产量 京博产量 \n",
|
||||
"1399 3630.0 3136.0033 1.0 4982.8366 5715.5175 \n",
|
||||
"1400 3630.0 2972.5098 1.0 4982.8366 5522.676 \n",
|
||||
"前一天的 3136.0033 <class 'float'>\n",
|
||||
"现在的 2972.5098 <class 'float'>\n",
|
||||
" index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 \\\n",
|
||||
"0 1399 2025-04-06 3600 37.3095 1 -1051.74 6000 107382.182661 \n",
|
||||
"1 1400 2025-04-07 3500 36.784 1 478.28 6000 107945.963078 \n",
|
||||
"\n",
|
||||
" 下游客户价格预期 即期成本 订单结构 计划产量 京博产量 \n",
|
||||
"0 3630 3136.0033 1 4982.8366 5715.5175 \n",
|
||||
"1 3630 2972.5098 1 4982.8366 5522.676 \n",
|
||||
"昨日计划提货偏差改之前 478.28\n",
|
||||
"昨日计划提货偏差改之后 539.8394000000008\n",
|
||||
"**************************************************预测结果: 3567.73\n",
|
||||
"更新前一天数据\n",
|
||||
"更新数据前\n",
|
||||
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
|
||||
"1401 2025-04-08 3450 36.784 1 478.28 6000 107945.963078 3630 \n",
|
||||
"\n",
|
||||
" 即期成本 订单结构 计划产量 京博产量 \n",
|
||||
"1401 3096.5238 1 4982.8366 5522.676 \n",
|
||||
"日期存在,即将更新\n",
|
||||
"新数据 [3500.0, 36.784, '', 478.28, '', 107945.9630779, '', 2972.5098, '', 4982.8366, 5522.676]\n",
|
||||
"更新数据后\n",
|
||||
" 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
|
||||
"1401 2025-04-08 3450 36.784 1 478.28 6000 107945.963078 3630 \n",
|
||||
"\n",
|
||||
" 即期成本 订单结构 计划产量 京博产量 \n",
|
||||
"1401 3096.5238 1 4982.8366 5522.676 \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[45], line 15\u001b[0m\n\u001b[0;32m 13\u001b[0m token \u001b[38;5;241m=\u001b[39m getLogToken()\n\u001b[0;32m 14\u001b[0m updateYesterdayExcelData(token\u001b[38;5;241m=\u001b[39mtoken)\n\u001b[1;32m---> 15\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 16\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m执行失败: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
||||
"运行中...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1265,8 +1203,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# start_date = datetime(2025, 4, 1)\n",
|
||||
"# end_date = datetime(2025, 4, 2)\n",
|
||||
"# start_date = datetime(2025,5,1)\n",
|
||||
"# end_date = datetime(2025, 5, 7)\n",
|
||||
"# token = getLogToken()\n",
|
||||
"# while start_date < end_date:\n",
|
||||
"# main(start_date,token)\n",
|
||||
@ -1317,7 +1255,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -1331,7 +1269,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -10,12 +10,12 @@
|
||||
"text/html": [
|
||||
" <script type=\"text/javascript\">\n",
|
||||
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
||||
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (typeof require !== 'undefined') {\n",
|
||||
" require.undef(\"plotly\");\n",
|
||||
" requirejs.config({\n",
|
||||
" paths: {\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
|
||||
" }\n",
|
||||
" });\n",
|
||||
" require(['plotly'], function(Plotly) {\n",
|
||||
@ -679,8 +679,6 @@
|
||||
" print(date)\n",
|
||||
" # 更新当月数据\n",
|
||||
" queryDataListItemNos(token)\n",
|
||||
" # 更新当日数据\n",
|
||||
" start_3(start_date,token,token_push)\n",
|
||||
" # 训练模型\n",
|
||||
" optimize_Model()\n",
|
||||
" # 预测&上传预测结果\n",
|
||||
@ -696,9 +694,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"运行中ing...\n",
|
||||
"18:13:56任务失败\n",
|
||||
"18:13:59任务失败\n"
|
||||
"运行中ing...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -714,10 +710,10 @@
|
||||
"\n",
|
||||
" # 判断当前时间是否为执行任务的时间点\n",
|
||||
" try:\n",
|
||||
" if current_time == \"17:05:00\":\n",
|
||||
" if current_time == \"09:15:00\":\n",
|
||||
" print(\"执行定时任务\")\n",
|
||||
" main()\n",
|
||||
" elif current_time == \"17:10:00\":\n",
|
||||
" elif current_time == \"20:00:00\":\n",
|
||||
" print('更新数据')\n",
|
||||
" start_3()\n",
|
||||
" time.sleep(1)\n",
|
||||
@ -725,9 +721,9 @@
|
||||
" print(f\"{current_time}任务失败\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# # 检测数据准确性, 需要检测放开\n",
|
||||
"# # check_data(\"100028098|LISTING_PRICE\")\n",
|
||||
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
|
||||
" # 检测数据准确性, 需要检测放开\n",
|
||||
" # check_data(\"100028098|LISTING_PRICE\")\n",
|
||||
" # check_data(\"9137070016544622XB|DAY_Yield\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -736,14 +732,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start_date = datetime(2025, 4, 8)\n",
|
||||
"# end_date = datetime(2025, 4, 9)\n",
|
||||
"# start_date = datetime(2025, 4, 1)\n",
|
||||
"# end_date = datetime(2025, 5, 7)\n",
|
||||
"# token = get_head_auth()\n",
|
||||
"\n",
|
||||
"# token_push = get_head_push_auth()\n",
|
||||
"\n",
|
||||
"# while start_date < end_date:\n",
|
||||
" # main(start_date,token,token_push)\n",
|
||||
"# main(start_date,token,token_push)\n",
|
||||
"# start_date += timedelta(days=1)\n"
|
||||
]
|
||||
},
|
||||
@ -936,7 +932,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -950,7 +946,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Binary file not shown.
Binary file not shown.
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -10,12 +10,12 @@
|
||||
"text/html": [
|
||||
" <script type=\"text/javascript\">\n",
|
||||
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
||||
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (typeof require !== 'undefined') {\n",
|
||||
" require.undef(\"plotly\");\n",
|
||||
" requirejs.config({\n",
|
||||
" paths: {\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
|
||||
" }\n",
|
||||
" });\n",
|
||||
" require(['plotly'], function(Plotly) {\n",
|
||||
@ -500,6 +500,7 @@
|
||||
" tuple: (紧凑日期字符串, 标准日期字符串)\n",
|
||||
" \"\"\"\n",
|
||||
" # 日期解析逻辑\n",
|
||||
" from datetime import datetime,timedelta\n",
|
||||
" if isinstance(date, datetime):\n",
|
||||
" now = date\n",
|
||||
" else:\n",
|
||||
@ -695,7 +696,7 @@
|
||||
" # 更新当月数据\n",
|
||||
" queryDataListItemNos(start_date,token)\n",
|
||||
" # 更新当日数据\n",
|
||||
" # start(date)\n",
|
||||
" start(date)\n",
|
||||
" # 训练模型\n",
|
||||
" optimize_Model()\n",
|
||||
" # 预测&上传预测结果\n",
|
||||
@ -705,52 +706,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# if __name__ == \"__main__\":\n",
|
||||
"# print('运行中')\n",
|
||||
"# # 需要单独运行放开\n",
|
||||
"# # start()\n",
|
||||
"# # start_1(date='2025-01-22')\n",
|
||||
"# # start_1()\n",
|
||||
"\n",
|
||||
"# # 每天定时12点运行\n",
|
||||
"# while True:\n",
|
||||
"# try:\n",
|
||||
"# # 获取当前时间\n",
|
||||
"# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||||
"# current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||||
"# # print(current_time_1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# # 判断当前时间是否为执行任务的时间点\n",
|
||||
"# if current_time == \"09:15:00\":\n",
|
||||
"# print(\"执行定时任务\")\n",
|
||||
" # start()\n",
|
||||
"\n",
|
||||
"# # 休眠1秒钟,避免过多占用CPU资源\n",
|
||||
"# time.sleep(1)\n",
|
||||
"\n",
|
||||
"# elif current_time_1 == \"20:00:00\":\n",
|
||||
"# print(\"更新数据\")\n",
|
||||
"# start_1()\n",
|
||||
"# time.sleep(1)\n",
|
||||
"# except:\n",
|
||||
"# print('执行错误')\n",
|
||||
"# time.sleep(1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# # 检测数据准确性, 需要检测放开\n",
|
||||
"# # check_data(\"100028098|LISTING_PRICE\")\n",
|
||||
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
|
||||
]
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@ -761,103 +722,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"运行中ing...\n",
|
||||
"获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0NDE5ODg0NywianRpIjoiZmJlMmI4MzA5NzFmNDBhMzhiZTA5YTZjMDEyZjU4YmQifQ.rGLp0UBfeu5JmoYXbGSgCpkrO2QnlAx8hFbbbDDXC8I\n",
|
||||
"20250409\n",
|
||||
" dataDate dataItemNo dataValue\n",
|
||||
"0 2025-04-01 100028046|LISTING_PRICE 8208.0\n",
|
||||
"1 2025-04-02 100028046|LISTING_PRICE 8244.0\n",
|
||||
"2 2025-04-03 100028046|LISTING_PRICE 8244.0\n",
|
||||
"3 2025-04-04 100028046|LISTING_PRICE 8165.0\n",
|
||||
"4 2025-04-05 100028046|LISTING_PRICE 8114.0\n",
|
||||
".. ... ... ...\n",
|
||||
"183 2025-04-07 YHQMXBB|C01100008|STRIKE_PRICE 5180.0\n",
|
||||
"184 2025-04-02 YHQMXBB|C01100008|STRIKE_PRICE 5310.0\n",
|
||||
"185 2025-04-01 YHQMXBB|C01100008|STRIKE_PRICE 5260.0\n",
|
||||
"186 2025-04-04 YHQMXBB|C01100008|STRIKE_PRICE 5230.0\n",
|
||||
"187 2025-04-05 YHQMXBB|C01100008|STRIKE_PRICE 5180.0\n",
|
||||
"\n",
|
||||
"[188 rows x 3 columns]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:614: DeprecationWarning:\n",
|
||||
"\n",
|
||||
"The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"当月数据更新完成\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:255: UserWarning:\n",
|
||||
"\n",
|
||||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||||
"\n",
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:257: UserWarning:\n",
|
||||
"\n",
|
||||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using matplotlib backend: QtAgg\n",
|
||||
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
|
||||
"Populating the interactive namespace from numpy and matplotlib\n",
|
||||
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
|
||||
"\n",
|
||||
"pylab import has clobbered these variables: ['plot', 'random', '__version__', 'datetime']\n",
|
||||
"`%matplotlib` prevents importing * from pylab and numpy\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Date\n",
|
||||
"2025-04-09 5179.792969\n",
|
||||
"Name: 日度预测价格, dtype: float32\n",
|
||||
"预测值: 5179.79\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:203: UserWarning:\n",
|
||||
"\n",
|
||||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||||
"\n",
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:205: UserWarning:\n",
|
||||
"\n",
|
||||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||||
"\n",
|
||||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_9964\\3261286938.py:237: FutureWarning:\n",
|
||||
"\n",
|
||||
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
||||
"\n"
|
||||
"运行中ing...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -866,19 +731,19 @@
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" print(\"运行中ing...\")\n",
|
||||
" # 每天定时12点运行\n",
|
||||
" # while True:\n",
|
||||
" # # 获取当前时间\n",
|
||||
" # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||||
" # try:\n",
|
||||
" # # 判断当前时间是否为执行任务的时间点\n",
|
||||
" # if current_time == \"12:00:00\":\n",
|
||||
" # print(\"执行定时任务\")\n",
|
||||
" # main()\n",
|
||||
" # elif current_time == \"20:00:00\":\n",
|
||||
" # start_1()\n",
|
||||
" # time.sleep(1)\n",
|
||||
" # except:\n",
|
||||
" # print(f\"{current_time}执行失败\")\n",
|
||||
" while True:\n",
|
||||
" # 获取当前时间\n",
|
||||
" current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||||
" try:\n",
|
||||
" # 判断当前时间是否为执行任务的时间点\n",
|
||||
" if current_time == \"09:15:00\":\n",
|
||||
" print(\"执行定时任务\")\n",
|
||||
" main()\n",
|
||||
" elif current_time == \"20:00:00\":\n",
|
||||
" start_1()\n",
|
||||
" time.sleep(1)\n",
|
||||
" except:\n",
|
||||
" print(f\"{current_time}执行失败\")\n",
|
||||
"\n",
|
||||
" # 检测数据准确性, 需要检测放开\n",
|
||||
" # check_data(\"100028098|LISTING_PRICE\")\n",
|
||||
@ -887,24 +752,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start_date = datetime(2025, 4, 2)\n",
|
||||
"# end_date = datetime(2025, 4, 3)\n",
|
||||
"# start_date = datetime(2025, 4, 1)\n",
|
||||
"# end_date = datetime(2025, 5, 7)\n",
|
||||
"# token = get_head_auth()\n",
|
||||
"\n",
|
||||
"# while start_date < end_date:\n",
|
||||
"# date = start_date.strftime('%Y%m%d')\n",
|
||||
"# date2 = start_date.strftime('%Y-%m-%d')\n",
|
||||
"# queryDataListItemNos(date=start_date,token=token)\n",
|
||||
"# updateYesterdayExcelData(date=date2,token=token)\n",
|
||||
"# start(date)\n",
|
||||
"# main(start_date=start_date,token=token,token_push=token)\n",
|
||||
"# # # time.sleep(1)\n",
|
||||
"# # start_1(start_date)\n",
|
||||
"# start_date += timedelta(days=1)\n",
|
||||
"# time.sleep(5)\n",
|
||||
"# time.sleep(2)\n",
|
||||
"\n",
|
||||
"# # print(price_list)"
|
||||
]
|
||||
@ -919,7 +783,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -933,7 +797,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Binary file not shown.
Binary file not shown.
@ -5,17 +5,27 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n",
|
||||
"\n",
|
||||
"The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
" <script type=\"text/javascript\">\n",
|
||||
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
||||
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||||
" if (typeof require !== 'undefined') {\n",
|
||||
" require.undef(\"plotly\");\n",
|
||||
" requirejs.config({\n",
|
||||
" paths: {\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
|
||||
" 'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
|
||||
" }\n",
|
||||
" });\n",
|
||||
" require(['plotly'], function(Plotly) {\n",
|
||||
@ -33,8 +43,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"获取到的数据项ID['数据项编码', 'C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n",
|
||||
"获取到的数据项ID['C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n"
|
||||
"运行中ing\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -209,6 +218,7 @@
|
||||
" tuple: (紧凑日期字符串, 标准日期字符串)\n",
|
||||
" \"\"\"\n",
|
||||
" # 日期解析逻辑\n",
|
||||
" from datetime import datetime,timedelta\n",
|
||||
" if isinstance(date, datetime):\n",
|
||||
" now = date\n",
|
||||
" else:\n",
|
||||
@ -265,22 +275,22 @@
|
||||
"\n",
|
||||
"def upload_data_to_system(token_push,date):\n",
|
||||
" datavalue = forecast_price()\n",
|
||||
" # data = {\n",
|
||||
" # \"funcModule\": \"数据表信息列表\",\n",
|
||||
" # \"funcOperation\": \"新增\",\n",
|
||||
" # \"data\": [\n",
|
||||
" # {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
|
||||
" # \"dataDate\": get_cur_time(date)[0],\n",
|
||||
" # \"dataStatus\": \"add\",\n",
|
||||
" # \"dataValue\": datavalue\n",
|
||||
" # }\n",
|
||||
" data = {\n",
|
||||
" \"funcModule\": \"数据表信息列表\",\n",
|
||||
" \"funcOperation\": \"新增\",\n",
|
||||
" \"data\": [\n",
|
||||
" {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n",
|
||||
" \"dataDate\": get_cur_time(date)[0],\n",
|
||||
" \"dataStatus\": \"add\",\n",
|
||||
" \"dataValue\": datavalue\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" # ]\n",
|
||||
" # }\n",
|
||||
" # print(data)\n",
|
||||
" # headers = {\"Authorization\": token_push}\n",
|
||||
" # res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
|
||||
" # print(res.text)\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" print(data)\n",
|
||||
" headers = {\"Authorization\": token_push}\n",
|
||||
" res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
|
||||
" print(res.text)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"# def upload_data_to_system(token):\n",
|
||||
@ -564,7 +574,8 @@
|
||||
" return\n",
|
||||
"\n",
|
||||
" # data_list = [two_cols, one_cols]\n",
|
||||
" append_rows = [getNow()[1]]\n",
|
||||
" append_rows = [getNow(date)[1]]\n",
|
||||
"# append_rows = [getNow()[1]]\n",
|
||||
" dataItemNo_dataValue = {}\n",
|
||||
" for data_value in datas:\n",
|
||||
" if \"dataValue\" not in data_value:\n",
|
||||
@ -844,6 +855,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" print('运行中ing')\n",
|
||||
"\n",
|
||||
" # 每天定时12点运行\n",
|
||||
" while True:\n",
|
||||
@ -875,14 +887,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # 自定义日期执行预测\n",
|
||||
"# # # 自定义日期执行预测\n",
|
||||
"\n",
|
||||
"# start_date = datetime(2025, 4, 8)\n",
|
||||
"# end_date = datetime(2025, 4, 9)\n",
|
||||
"# start_date = datetime(2025, 5, 1)\n",
|
||||
"# end_date = datetime(2025, 5, 7)\n",
|
||||
"\n",
|
||||
"# token = get_head_auth()\n",
|
||||
"# token_push = get_head_push_auth()\n",
|
||||
@ -904,7 +916,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -918,7 +930,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Binary file not shown.
@ -111,15 +111,15 @@ data = {
|
||||
"IndexCode": "",
|
||||
"IndexName": "价格预测模型",
|
||||
"Unit": "无",
|
||||
"Frequency": "日度",
|
||||
"SourceName": f"价格预测",
|
||||
"Remark": 'ddd',
|
||||
"DataList": [
|
||||
{
|
||||
"Date": "2024-05-02",
|
||||
"Value": 333444
|
||||
}
|
||||
]
|
||||
"Frequency": "日度",
|
||||
"SourceName": f"价格预测",
|
||||
"Remark": 'ddd',
|
||||
"DataList": [
|
||||
{
|
||||
"Date": "2024-05-02",
|
||||
"Value": 333444
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# eta 分类
|
||||
@ -242,7 +242,7 @@ table_name = 'v_tbl_crude_oil_warning'
|
||||
# 开关
|
||||
is_train = True # 是否训练
|
||||
is_debug = False # 是否调试
|
||||
is_eta = False # 是否使用eta接口
|
||||
is_eta = True # 是否使用eta接口
|
||||
is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效
|
||||
is_timefurture = True # 是否使用时间特征
|
||||
is_fivemodels = False # 是否使用之前保存的最佳的5个模型
|
||||
|
@ -491,14 +491,18 @@ def featurePindu(dataset):
|
||||
featureInfo += ', 详看 附1、特征列表'
|
||||
|
||||
featureInfo += '''
|
||||
数据特征工程:
|
||||
1. 数据日期排序,新日期在最后
|
||||
2. 删除空列,特征数据列没有值,就删除
|
||||
3. 删除近两月不再更新值的指标
|
||||
4. 非日度数据填充为日度数据,填充规则:
|
||||
-- 向后填充,举例:假设周五出现一个周度指标数据,那么在这之前的数据用上周五的数据
|
||||
-- 向前填充,举例:采集数据开始日期为2018年1月1日,那么周度数据可能是2018年1月3日,那么3日的数据向前填充,使1日2日都有数值
|
||||
数据特征相关性分析:
|
||||
时序数据工程处理:
|
||||
1. 时序对齐处理
|
||||
- 按观测时间升序排列,确保最新数据位于序列末端
|
||||
2. 空值特征筛除
|
||||
- 删除全空值特征列(缺失率=100%)
|
||||
3. 失效指标剔除
|
||||
- 基于最近两个月数据更新状态,移除停止更新的指标
|
||||
4. 多粒度特征时序化处理
|
||||
- 非日度指标采用前向/后向插值法转化为日频数据:
|
||||
▶ 后向填充:当周度指标在周五更新时,用上周五值填充当周前四日
|
||||
▶ 前向填充:对数据采集起始日的空白值,采用首个有效值前推填充
|
||||
数据特征相关性分析:
|
||||
'''
|
||||
config.logger.info(featureInfo)
|
||||
with open(os.path.join(dataset, '特征频度统计.txt'), 'w', encoding='utf-8') as f:
|
||||
|
240
lib/tools.py
240
lib/tools.py
@ -1,30 +1,46 @@
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from sklearn import metrics
|
||||
import random, string, base64, hmac, hashlib
|
||||
from reportlab.pdfbase import pdfmetrics # 注册字体
|
||||
from reportlab.pdfbase.ttfonts import TTFont # 字体类
|
||||
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
|
||||
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
|
||||
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
|
||||
from reportlab.lib import colors # 颜色模块
|
||||
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
|
||||
from reportlab.graphics.charts.legends import Legend # 图例类
|
||||
from reportlab.graphics.shapes import Drawing # 绘图工具
|
||||
from reportlab.lib.units import cm # 单位:cm
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
import sqlite3
|
||||
import pymysql
|
||||
import tkinter as tk
|
||||
from langchain_core.documents import Document
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain.chains.summarize import load_summarize_chain
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain.document_loaders import UnstructuredURLLoader
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from tkinter import messagebox
|
||||
import tkinter as tk
|
||||
import pymysql
|
||||
import sqlite3
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
import smtplib
|
||||
from reportlab.lib.units import cm # 单位:cm
|
||||
from reportlab.graphics.shapes import Drawing # 绘图工具
|
||||
from reportlab.graphics.charts.legends import Legend # 图例类
|
||||
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
|
||||
from reportlab.lib import colors # 颜色模块
|
||||
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
|
||||
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
|
||||
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image # 报告内容相关类
|
||||
from reportlab.pdfbase.ttfonts import TTFont # 字体类
|
||||
from reportlab.pdfbase import pdfmetrics # 注册字体
|
||||
import hashlib
|
||||
import hmac
|
||||
import base64
|
||||
import string
|
||||
import random
|
||||
from sklearn import metrics
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
|
||||
global logger
|
||||
|
||||
|
||||
def timeit(func):
|
||||
'''计时装饰器'''
|
||||
def wrapper(*args, **kwargs):
|
||||
@ -36,10 +52,12 @@ def timeit(func):
|
||||
return result
|
||||
return wrapper
|
||||
|
||||
|
||||
class BinanceAPI:
|
||||
'''
|
||||
获取 Binance API 请求头签名
|
||||
'''
|
||||
|
||||
def __init__(self, APPID, SECRET):
|
||||
self.APPID = APPID
|
||||
self.SECRET = SECRET
|
||||
@ -47,7 +65,8 @@ class BinanceAPI:
|
||||
|
||||
# 生成随机字符串作为 nonce
|
||||
def generate_nonce(self, length=32):
|
||||
self.nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
|
||||
self.nonce = ''.join(random.choices(
|
||||
string.ascii_letters + string.digits, k=length))
|
||||
return self.nonce
|
||||
|
||||
# 获取当前时间戳(秒)
|
||||
@ -124,7 +143,7 @@ class Graphs:
|
||||
|
||||
# 绘制表格
|
||||
@staticmethod
|
||||
def draw_table(col_width,*args):
|
||||
def draw_table(col_width, *args):
|
||||
# 列宽度
|
||||
col_width = col_width
|
||||
style = [
|
||||
@ -196,6 +215,8 @@ def mse(y_true, y_pred):
|
||||
|
||||
return res_mse
|
||||
# RMSE
|
||||
|
||||
|
||||
def rmse(y_true, y_pred):
|
||||
|
||||
res_rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
|
||||
@ -203,6 +224,8 @@ def rmse(y_true, y_pred):
|
||||
return res_rmse
|
||||
|
||||
# MAE
|
||||
|
||||
|
||||
def mae(y_true, y_pred):
|
||||
|
||||
res_mae = metrics.mean_absolute_error(y_true, y_pred)
|
||||
@ -211,6 +234,8 @@ def mae(y_true, y_pred):
|
||||
|
||||
# sklearn的库中没有MAPE和SMAPE,下面根据公式给出算法实现
|
||||
# MAPE
|
||||
|
||||
|
||||
def mape(y_true, y_pred):
|
||||
|
||||
res_mape = np.mean(np.abs((y_pred - y_true) / y_true)) * 100
|
||||
@ -218,13 +243,18 @@ def mape(y_true, y_pred):
|
||||
return res_mape
|
||||
|
||||
# SMAPE
|
||||
|
||||
|
||||
def smape(y_true, y_pred):
|
||||
|
||||
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
|
||||
res_smape = 2.0 * np.mean(np.abs(y_pred - y_true) /
|
||||
(np.abs(y_pred) + np.abs(y_true))) * 100
|
||||
|
||||
return res_smape
|
||||
|
||||
# 相关系数绘制
|
||||
|
||||
|
||||
def plot_corr(data, size=11):
|
||||
# 去掉ds列
|
||||
data.drop(columns=['ds'], inplace=True)
|
||||
@ -234,10 +264,11 @@ def plot_corr(data, size=11):
|
||||
|
||||
# 计算各特征与目标列的皮尔逊相关系数,并保存到新的 DataFrame 中
|
||||
for col in data.columns:
|
||||
if col!= 'y':
|
||||
if col != 'y':
|
||||
pearson_correlation = np.corrcoef(data[col], data['y'])[0, 1]
|
||||
spearman_correlation, _ = spearmanr(data[col], data['y'])
|
||||
new_row = {'Feature': col, 'Pearson_Correlation': round(pearson_correlation,3), 'Spearman_Correlation': round(spearman_correlation,2)}
|
||||
new_row = {'Feature': col, 'Pearson_Correlation': round(
|
||||
pearson_correlation, 3), 'Spearman_Correlation': round(spearman_correlation, 2)}
|
||||
correlation_df = correlation_df._append(new_row, ignore_index=True)
|
||||
# 删除空列
|
||||
correlation_df.drop('Correlation', axis=1, inplace=True)
|
||||
@ -248,9 +279,10 @@ def plot_corr(data, size=11):
|
||||
# 生成 -1 到 1 的 20 个区间
|
||||
bins = np.linspace(-1, 1, 21)
|
||||
# 计算每个区间的统计数(这里是区间内数据的数量)
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
|
||||
for i in range(len(bins) - 1)]
|
||||
|
||||
#设置画布大小
|
||||
# 设置画布大小
|
||||
plt.figure(figsize=(10, 6))
|
||||
# 绘制直方图
|
||||
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
|
||||
@ -262,12 +294,12 @@ def plot_corr(data, size=11):
|
||||
plt.savefig('皮尔逊相关性系数.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
#设置画布大小
|
||||
# 设置画布大小
|
||||
plt.figure(figsize=(10, 6))
|
||||
data = correlation_df['Spearman_Correlation'].values.tolist()
|
||||
# 计算每个区间的统计数(这里是区间内数据的数量)
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1])) for i in range(len(bins) - 1)]
|
||||
hist_values = [np.sum((data >= bins[i]) & (data < bins[i + 1]))
|
||||
for i in range(len(bins) - 1)]
|
||||
|
||||
# 绘制直方图
|
||||
plt.bar(bins[:-1], hist_values, width=(bins[1] - bins[0]))
|
||||
@ -282,9 +314,9 @@ def plot_corr(data, size=11):
|
||||
|
||||
# 邮件封装
|
||||
class SendMail(object):
|
||||
def __init__(self,username,passwd,recv,title,content,
|
||||
file=None,ssl=False,
|
||||
email_host='smtp.qq.com',port=25,ssl_port=465):
|
||||
def __init__(self, username, passwd, recv, title, content,
|
||||
file=None, ssl=False,
|
||||
email_host='smtp.qq.com', port=25, ssl_port=465):
|
||||
'''
|
||||
:param username: 用户名
|
||||
:param passwd: 密码
|
||||
@ -297,62 +329,67 @@ class SendMail(object):
|
||||
:param port: 非安全链接端口,默认为25
|
||||
:param ssl_port: 安全链接端口,默认为465
|
||||
'''
|
||||
self.username = username #用户名
|
||||
self.passwd = passwd #密码
|
||||
self.recv = recv #收件人,多个要传list ['a@qq.com','b@qq.com]
|
||||
self.title = title #邮件标题
|
||||
self.content = content #邮件正文
|
||||
self.file = file #附件路径,如果不在当前目录下,要写绝对路径
|
||||
self.email_host = email_host #smtp服务器地址
|
||||
self.port = port #普通端口
|
||||
self.ssl = ssl #是否安全链接
|
||||
self.ssl_port = ssl_port #安全链接端口
|
||||
self.username = username # 用户名
|
||||
self.passwd = passwd # 密码
|
||||
self.recv = recv # 收件人,多个要传list ['a@qq.com','b@qq.com]
|
||||
self.title = title # 邮件标题
|
||||
self.content = content # 邮件正文
|
||||
self.file = file # 附件路径,如果不在当前目录下,要写绝对路径
|
||||
self.email_host = email_host # smtp服务器地址
|
||||
self.port = port # 普通端口
|
||||
self.ssl = ssl # 是否安全链接
|
||||
self.ssl_port = ssl_port # 安全链接端口
|
||||
|
||||
def send_mail(self):
|
||||
msg = MIMEMultipart()
|
||||
#发送内容的对象
|
||||
if self.file:#处理附件的
|
||||
file_name = os.path.split(self.file)[-1]#只取文件名,不取路径
|
||||
# 发送内容的对象
|
||||
if self.file: # 处理附件的
|
||||
file_name = os.path.split(self.file)[-1] # 只取文件名,不取路径
|
||||
try:
|
||||
f = open(self.file, 'rb').read()
|
||||
except Exception as e:
|
||||
raise Exception('附件打不开!!!!')
|
||||
else:
|
||||
att = MIMEText(f,"base64", "utf-8")
|
||||
att = MIMEText(f, "base64", "utf-8")
|
||||
att["Content-Type"] = 'application/octet-stream'
|
||||
#base64.b64encode(file_name.encode()).decode()
|
||||
new_file_name='=?utf-8?b?' + base64.b64encode(file_name.encode()).decode() + '?='
|
||||
#这里是处理文件名为中文名的,必须这么写
|
||||
att["Content-Disposition"] = 'attachment; filename="%s"'%(new_file_name)
|
||||
# base64.b64encode(file_name.encode()).decode()
|
||||
new_file_name = '=?utf-8?b?' + \
|
||||
base64.b64encode(file_name.encode()).decode() + '?='
|
||||
# 这里是处理文件名为中文名的,必须这么写
|
||||
att["Content-Disposition"] = 'attachment; filename="%s"' % (
|
||||
new_file_name)
|
||||
msg.attach(att)
|
||||
msg.attach(MIMEText(self.content))#邮件正文的内容
|
||||
msg.attach(MIMEText(self.content)) # 邮件正文的内容
|
||||
msg['Subject'] = self.title # 邮件主题
|
||||
msg['From'] = self.username # 发送者账号
|
||||
msg['To'] = ','.join(self.recv) # 接收者账号列表
|
||||
if self.ssl:
|
||||
self.smtp = smtplib.SMTP_SSL(self.email_host,port=self.ssl_port)
|
||||
self.smtp = smtplib.SMTP_SSL(self.email_host, port=self.ssl_port)
|
||||
else:
|
||||
self.smtp = smtplib.SMTP(self.email_host,port=self.port)
|
||||
#发送邮件服务器的对象
|
||||
self.smtp.login(self.username,self.passwd)
|
||||
self.smtp = smtplib.SMTP(self.email_host, port=self.port)
|
||||
# 发送邮件服务器的对象
|
||||
self.smtp.login(self.username, self.passwd)
|
||||
try:
|
||||
self.smtp.sendmail(self.username,self.recv,msg.as_string())
|
||||
self.smtp.sendmail(self.username, self.recv, msg.as_string())
|
||||
pass
|
||||
except Exception as e:
|
||||
print('出错了。。',e)
|
||||
logger.info('邮件服务出错了。。',e)
|
||||
print('出错了。。', e)
|
||||
logger.info('邮件服务出错了。。', e)
|
||||
else:
|
||||
print('发送成功!')
|
||||
self.smtp.quit()
|
||||
|
||||
|
||||
def dateConvert(df, datecol='ds'):
|
||||
# 将date列转换为datetime类型
|
||||
try:
|
||||
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y-%m-%d')
|
||||
df[datecol] = pd.to_datetime(df[datecol], format=r'%Y-%m-%d')
|
||||
except:
|
||||
df[datecol] = pd.to_datetime(df[datecol],format=r'%Y/%m/%d')
|
||||
df[datecol] = pd.to_datetime(df[datecol], format=r'%Y/%m/%d')
|
||||
return df
|
||||
|
||||
def save_to_database(sqlitedb,df,dbname,end_time):
|
||||
|
||||
def save_to_database(sqlitedb, df, dbname, end_time):
|
||||
'''
|
||||
create_dt ,ds 判断数据是否存在,不存在则插入,存在则更新
|
||||
'''
|
||||
@ -361,20 +398,25 @@ def save_to_database(sqlitedb,df,dbname,end_time):
|
||||
df['ds'] = df['ds'].dt.strftime('%Y-%m-%d')
|
||||
|
||||
if not sqlitedb.check_table_exists(dbname):
|
||||
df.to_sql(dbname,sqlitedb.connection,index=False)
|
||||
df.to_sql(dbname, sqlitedb.connection, index=False)
|
||||
else:
|
||||
for col in df.columns:
|
||||
sqlitedb.add_column_if_not_exists(dbname,col,'TEXT')
|
||||
sqlitedb.add_column_if_not_exists(dbname, col, 'TEXT')
|
||||
for row in df.itertuples(index=False):
|
||||
row_dict = row._asdict()
|
||||
columns=row_dict.keys()
|
||||
check_query = sqlitedb.select_data(dbname,where_condition = f"ds = '{row.ds}' and created_dt = '{end_time}'")
|
||||
columns = row_dict.keys()
|
||||
check_query = sqlitedb.select_data(
|
||||
dbname, where_condition=f"ds = '{row.ds}' and created_dt = '{end_time}'")
|
||||
if len(check_query) > 0:
|
||||
set_clause = ", ".join([f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
sqlitedb.update_data(dbname,set_clause,where_condition = f"ds = '{row.ds} and created_dt = {end_time}'")
|
||||
set_clause = ", ".join(
|
||||
[f"{key} = '{value}'" for key, value in row_dict.items()])
|
||||
sqlitedb.update_data(
|
||||
dbname, set_clause, where_condition=f"ds = '{row.ds} and created_dt = {end_time}'")
|
||||
continue
|
||||
else:
|
||||
sqlitedb.insert_data(dbname,tuple(row_dict.values()),columns=columns)
|
||||
sqlitedb.insert_data(dbname, tuple(
|
||||
row_dict.values()), columns=columns)
|
||||
|
||||
|
||||
class SQLiteHandler:
|
||||
def __init__(self, db_name):
|
||||
@ -426,7 +468,8 @@ class SQLiteHandler:
|
||||
query += f" LIMIT {limit}"
|
||||
results = self.execute_query(query).fetchall()
|
||||
if results:
|
||||
headers = [description[0] for description in self.execute_query(query).description]
|
||||
headers = [description[0]
|
||||
for description in self.execute_query(query).description]
|
||||
return pd.DataFrame(results, columns=headers)
|
||||
else:
|
||||
return pd.DataFrame()
|
||||
@ -463,14 +506,13 @@ class SQLiteHandler:
|
||||
query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
|
||||
self.execute_query(query)
|
||||
self.commit()
|
||||
print(f"Column '{column_name}' added to table '{table_name}' successfully.")
|
||||
print(
|
||||
f"Column '{column_name}' added to table '{table_name}' successfully.")
|
||||
else:
|
||||
print(f"Column '{column_name}' already exists in table '{table_name}'.")
|
||||
print(
|
||||
f"Column '{column_name}' already exists in table '{table_name}'.")
|
||||
|
||||
|
||||
|
||||
|
||||
import logging
|
||||
class MySQLDB:
|
||||
def __init__(self, host, user, password, database):
|
||||
self.host = host
|
||||
@ -538,17 +580,20 @@ class MySQLDB:
|
||||
self.connection.close()
|
||||
logging.info("Database connection closed.")
|
||||
|
||||
|
||||
def exception_logger(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
# 记录异常日志
|
||||
logging.error(f"An error occurred in function {func.__name__}: {str(e)}")
|
||||
logging.error(
|
||||
f"An error occurred in function {func.__name__}: {str(e)}")
|
||||
# 可以选择重新抛出异常,或者在这里处理异常
|
||||
raise e # 重新抛出异常
|
||||
return wrapper
|
||||
|
||||
|
||||
def get_week_date(end_time):
|
||||
'''
|
||||
获取上上周五,上周周一周二周三周四周五的日期
|
||||
@ -560,7 +605,40 @@ def get_week_date(end_time):
|
||||
up_week_dates = [up_week + datetime.timedelta(days=i) for i in range(14)]
|
||||
create_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[4:-3]]
|
||||
ds_dates = [date.strftime('%Y-%m-%d') for date in up_week_dates[-7:-2]]
|
||||
return create_dates,ds_dates
|
||||
return create_dates, ds_dates
|
||||
|
||||
|
||||
class DeepSeek():
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def summary(self, text):
|
||||
prompt_template = '''请根据以下ARIMA预测结果分析未来的趋势:
|
||||
"{text}"
|
||||
|
||||
请用专业且结构清晰的中文撰写,重点数据用**加粗**显示
|
||||
'''
|
||||
chinese_prompt = PromptTemplate(
|
||||
template=prompt_template, input_variables=['text'])
|
||||
|
||||
docs = [Document(page_content=text, metadata={
|
||||
"source": "arima_forecast"})]
|
||||
|
||||
apikey = os.environ.get('OPENAI_API_KEY')
|
||||
llm = ChatOpenAI(
|
||||
model="deepseek-chat",
|
||||
temperature=0,
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
api_key=os.environ.get('OPENAI_API_KEY')
|
||||
)
|
||||
chain = load_summarize_chain(llm, prompt=chinese_prompt)
|
||||
print('大语言模型分析预测结果')
|
||||
|
||||
summary = chain.invoke({"input_documents": docs})['output_text']
|
||||
print('大语言模型分析结果:')
|
||||
print(summary)
|
||||
return summary
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('This is a tool, not a script.')
|
@ -3297,8 +3297,9 @@ def pp_export_pdf(num_indicators=475, num_models=21, num_dayindicator=202, input
|
||||
# 特征、模型、参数配置
|
||||
content.append(Graphs.draw_little_title('模型选择:'))
|
||||
content.append(Graphs.draw_text(
|
||||
f'本次预测使用了一个专门收集时间序列的NeuralForecast库中的{num_models}个模型:'))
|
||||
content.append(Graphs.draw_text(f'使用40天的数据预测未来{inputsize}天的数据。'))
|
||||
f'本次预测调用专用于时间序列预测的NeuralForecast库中{num_models}个模型:'))
|
||||
content.append(Graphs.draw_text(
|
||||
f'基于40天历史数据构建多维时间窗口,采用注意力机制预测未来{inputsize}天趋势'))
|
||||
content.append(Graphs.draw_little_title('指标情况:'))
|
||||
with open(os.path.join(config.dataset, '特征频度统计.txt'), encoding='utf-8') as f:
|
||||
for line in f.readlines():
|
||||
|
Loading…
Reference in New Issue
Block a user