diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/PriceForecast-svn.iml b/.idea/PriceForecast-svn.iml
new file mode 100644
index 0000000..281df3b
--- /dev/null
+++ b/.idea/PriceForecast-svn.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..9b8af4c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..13dea3c
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/aisenzhecode/沥青/定性模型数据项12-11.xlsx b/aisenzhecode/沥青/定性模型数据项12-11.xlsx
index f9dcf11..5fb9d60 100644
Binary files a/aisenzhecode/沥青/定性模型数据项12-11.xlsx and b/aisenzhecode/沥青/定性模型数据项12-11.xlsx differ
diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl
index 4158966..3835903 100644
Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl differ
diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044
new file mode 100644
index 0000000..47e6ce9
Binary files /dev/null and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 differ
diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201
new file mode 100644
index 0000000..cd43d5c
Binary files /dev/null and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 differ
diff --git a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb
index 15a3486..7e61e16 100644
--- a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb
+++ b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb
@@ -1169,7 +1169,474 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "运行中...\n"
+ "运行中...\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1428 2025-05-06 3650 29.7819 1 -798.61 6000 90799.9 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1428 2934.18 1 4876.81 7364.99 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3650.0, '', '', -798.61, '', 90799.9179153, '', 2752.8654, '', '', 7364.987]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1428 2025-05-06 3650 29.7819 1 -798.61 6000 90799.9 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1428 2934.18 1 4876.81 7364.99 \n",
+ "20250507\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1427 2025-05-05 3650 29.3624 1 -798.61 6000 90799.9 3630 \n",
+ "1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1427 2752.87 1 4876.81 7364.99 \n",
+ "1428 2934.18 1 4876.81 6793.88 \n",
+ "前一天的 2752.8654 \n",
+ "现在的 2934.179 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1427 2025-05-05 3650 29.3624 1 -798.61 6000 90799.9 3630 \n",
+ "1 1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 2752.87 1 4876.81 7364.99 \n",
+ "1 2934.18 1 4876.81 6793.88 \n",
+ "昨日计划提货偏差改之前 -454.19\n",
+ "昨日计划提货偏差改之后 1917.0684\n",
+ "**************************************************预测结果: 3689.0\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1429 2025-05-07 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1429 3036.84 1 4876.81 6793.88 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3650.0, 32.2987, '', -454.19, '', 94300.352488, '', 2934.179, '', '', 6793.8778]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1429 2025-05-07 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1429 3036.84 1 4876.81 6793.88 \n",
+ "20250508\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1428 2934.18 1 4876.81 6793.88 \n",
+ "1429 3036.84 1 4876.81 7237.56 \n",
+ "前一天的 2934.179 \n",
+ "现在的 3036.8392 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n",
+ "1 1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 2934.18 1 4876.81 6793.88 \n",
+ "1 3036.84 1 4876.81 7237.56 \n",
+ "昨日计划提货偏差改之前 693.59\n",
+ "昨日计划提货偏差改之后 2360.7506000000003\n",
+ "**************************************************预测结果: 3673.89\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1430 2025-05-08 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1430 2940.67 1 4876.81 7237.56 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3600.0, 33.9765, '', 693.59, '', 97550.7881218, '', 3036.8392, '', '', 7237.56]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1430 2025-05-08 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1430 2940.67 1 4876.81 7237.56 \n",
+ "20250509\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "1429 3036.84 1 4876.81 7237.56 \n",
+ "1430 2940.67 1 4876.81 7711.98 \n",
+ "前一天的 3036.8392 \n",
+ "现在的 2940.6669 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n",
+ "1 1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3036.84 1 4876.81 7237.56 \n",
+ "1 2940.67 1 4876.81 7711.98 \n",
+ "昨日计划提货偏差改之前 329.51\n",
+ "昨日计划提货偏差改之后 2835.1686\n",
+ "**************************************************预测结果: 3604.08\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1431 2025-05-09 3600 34.396 1 329.51 6000 100086 3630 3077.15 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1431 1 4876.81 7711.98 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3600.0, 34.396, '', 329.51, '', 100085.7741619, '', 2940.6669, '', '', 7711.978]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1431 2025-05-09 3600 34.396 1 329.51 6000 100086 3630 3077.15 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1431 1 4876.81 7711.98 \n",
+ "20250510\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 2940.67 \n",
+ "1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 3077.15 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1430 1 4876.81 7711.98 \n",
+ "1431 1 4876.81 7246.98 \n",
+ "前一天的 2940.6669 \n",
+ "现在的 3077.1512 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n",
+ "1 1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 2940.67 1 4876.81 7711.98 \n",
+ "1 3077.15 1 4876.81 7246.98 \n",
+ "昨日计划提货偏差改之前 -1636.9\n",
+ "昨日计划提货偏差改之后 2370.1705999999995\n",
+ "**************************************************预测结果: 3630.55\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1432 2025-05-10 3620 36.9128 1 -1636.9 6000 101412 3630 3152.87 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1432 1 4876.81 7246.98 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, 36.9128, '', -1636.9, '', 101412.2754832, '', 3077.1512, '', '', 7246.98]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1432 2025-05-10 3620 36.9128 1 -1636.9 6000 101412 3630 3152.87 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1432 1 4876.81 7246.98 \n",
+ "20250511\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 3077.15 \n",
+ "1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 3152.87 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1431 1 4876.81 7246.98 \n",
+ "1432 1 4876.81 7347.37 \n",
+ "前一天的 3077.1512 \n",
+ "现在的 3152.8717 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 \n",
+ "1 1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3077.15 1 4876.81 7246.98 \n",
+ "1 3152.87 1 4876.81 7347.37 \n",
+ "昨日计划提货偏差改之前 3087.72\n",
+ "昨日计划提货偏差改之后 2470.5621\n",
+ "**************************************************预测结果: 3623.41\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1433 2025-05-11 3620 36.9128 1 3087.72 6000 104559 3630 3151.4 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1433 1 4876.81 7347.37 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, '', '', 3087.72, '', 104559.3953618, '', 3152.8717, '', '', 7347.3715]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1433 2025-05-11 3620 36.9128 1 3087.72 6000 104559 3630 3151.4 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1433 1 4876.81 7347.37 \n",
+ "20250512\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 3152.87 \n",
+ "1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 3151.4 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1432 1 4876.81 7347.37 \n",
+ "1433 1 4876.81 7365.94 \n",
+ "前一天的 3152.8717 \n",
+ "现在的 3151.3966 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 \n",
+ "1 1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3152.87 1 4876.81 7347.37 \n",
+ "1 3151.4 1 4876.81 7365.94 \n",
+ "昨日计划提货偏差改之前 2942.36\n",
+ "昨日计划提货偏差改之后 2489.1256000000003\n",
+ "**************************************************预测结果: 3623.44\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1434 2025-05-12 3620 36.9128 1 2942.36 6000 106829 3630 3171.09 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1434 1 4876.81 7365.94 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, '', '', 2942.36, '', 106829.1756043, '', 3151.3966, '', '', 7365.935]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1434 2025-05-12 3620 36.9128 1 2942.36 6000 106829 3630 3171.09 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1434 1 4876.81 7365.94 \n",
+ "20250513\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 3151.4 \n",
+ "1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 3171.09 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1433 1 4876.81 7365.94 \n",
+ "1434 1 4876.81 7267.18 \n",
+ "前一天的 3151.3966 \n",
+ "现在的 3171.0934 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 \n",
+ "1 1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3151.4 1 4876.81 7365.94 \n",
+ "1 3171.09 1 4876.81 7267.18 \n",
+ "昨日计划提货偏差改之前 251.7\n",
+ "昨日计划提货偏差改之后 2390.3720999999996\n",
+ "**************************************************预测结果: 3623.39\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1435 2025-05-13 3620 34.396 1 251.7 6000 106775 3630 3191.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1435 1 4876.81 7267.18 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, 34.396, '', 251.7, '', 106775.4686698, '', 3171.0934, '', '', 7267.1815]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1435 2025-05-13 3620 34.396 1 251.7 6000 106775 3630 3191.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1435 1 4876.81 7267.18 \n",
+ "20250514\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 3171.09 \n",
+ "1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 3191.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1434 1 4876.81 7267.18 \n",
+ "1435 1 4876.81 7248.69 \n",
+ "前一天的 3171.0934 \n",
+ "现在的 3191.6925 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 \n",
+ "1 1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3171.09 1 4876.81 7267.18 \n",
+ "1 3191.69 1 4876.81 7248.69 \n",
+ "昨日计划提货偏差改之前 -256.33\n",
+ "昨日计划提货偏差改之后 2371.8786\n",
+ "**************************************************预测结果: 3623.36\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1436 2025-05-14 3600 34.396 1 -256.33 6000 106220 3630 3318.29 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1436 1 4876.81 7248.69 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3600.0, 34.396, '', -256.33, '', 106220.4490483, '', 3191.6925, '', '', 7248.688]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1436 2025-05-14 3600 34.396 1 -256.33 6000 106220 3630 3318.29 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1436 1 4876.81 7248.69 \n",
+ "20250515\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 3191.69 \n",
+ "1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 3318.29 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1435 1 4876.81 7248.69 \n",
+ "1436 1 4876.81 7200.57 \n",
+ "前一天的 3191.6925 \n",
+ "现在的 3318.2936 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 \n",
+ "1 1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3191.69 1 4876.81 7248.69 \n",
+ "1 3318.29 1 4876.81 7200.57 \n",
+ "昨日计划提货偏差改之前 -1015.89\n",
+ "昨日计划提货偏差改之后 2323.7650999999996\n",
+ "**************************************************预测结果: 3628.64\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1437 2025-05-15 3600 33.557 1 -1015.89 6000 106321 3630 3170.14 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1437 1 4876.81 7200.57 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3600.0, 33.557, '', -1015.89, '', 106321.4042938, '', 3318.2936, '', '', 7200.5745]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1437 2025-05-15 3600 33.557 1 -1015.89 6000 106321 3630 3170.14 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1437 1 4876.81 7200.57 \n",
+ "20250516\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 3318.29 \n",
+ "1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 3170.14 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1436 1 4876.81 7200.57 \n",
+ "1437 1 4876.81 7298.53 \n",
+ "前一天的 3318.2936 \n",
+ "现在的 3170.1365 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 \n",
+ "1 1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3318.29 1 4876.81 7200.57 \n",
+ "1 3170.14 1 4876.81 7298.53 \n",
+ "昨日计划提货偏差改之前 1408.4\n",
+ "昨日计划提货偏差改之后 2421.7255999999998\n",
+ "**************************************************预测结果: 3573.82\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1438 2025-05-16 3620 33.9765 1 1408.4 6000 105595 3630 3164.18 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1438 1 4876.81 7298.53 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3600.0, 33.9765, '', 1408.4, '', 105595.49, '', 3170.1365, '', '', 7298.535]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1438 2025-05-16 3620 33.9765 1 1408.4 6000 105595 3630 3164.18 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1438 1 4876.81 7298.53 \n",
+ "20250517\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 3170.14 \n",
+ "1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 3164.18 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1437 1 4876.81 7298.53 \n",
+ "1438 1 4876.81 7407.17 \n",
+ "前一天的 3170.1365 \n",
+ "现在的 3164.177 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 \n",
+ "1 1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3170.14 1 4876.81 7298.53 \n",
+ "1 3164.18 1 4876.81 7407.17 \n",
+ "昨日计划提货偏差改之前 -1308.63\n",
+ "昨日计划提货偏差改之后 2530.3635999999997\n",
+ "**************************************************预测结果: 3603.59\n",
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1439 2025-05-17 3620 34.8154 1 -1308.63 6000 107985 3630 3246.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1439 1 4876.81 7407.17 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, 34.8154, '', -1308.63, '', 107984.71, '', 3164.177, '', '', 7407.173]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1439 2025-05-17 3620 34.8154 1 -1308.63 6000 107985 3630 3246.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1439 1 4876.81 7407.17 \n",
+ "20250518\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 3164.18 \n",
+ "1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 3246.69 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1438 1 4876.81 7407.17 \n",
+ "1439 1 4876.81 7029.96 \n",
+ "前一天的 3164.177 \n",
+ "现在的 3246.6882 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 \n",
+ "1 1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3164.18 1 4876.81 7407.17 \n",
+ "1 3246.69 1 4876.81 7029.96 \n",
+ "昨日计划提货偏差改之前 3334.9\n",
+ "昨日计划提货偏差改之后 2153.1485999999995\n",
+ "**************************************************预测结果: 3623.0\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "更新前一天数据\n",
+ "更新数据前\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1440 2025-05-18 3620 34.8154 1 3334.9 6000 109984 3630 3249.93 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1440 1 4876.81 7029.96 \n",
+ "日期存在,即将更新\n",
+ "新数据 [3620.0, '', '', 3334.9, '', 109984.19, '', 3246.6882, '', '', 7029.958]\n",
+ "更新数据后\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1440 2025-05-18 3620 34.8154 1 3334.9 6000 109984 3630 3249.93 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1440 1 4876.81 7029.96 \n",
+ "20250519\n",
+ " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n",
+ "1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 3246.69 \n",
+ "1440 2025-05-18 3620 34.8154 1 793.84 6000 110418 3630 3249.93 \n",
+ "\n",
+ " 订单结构 计划产量 京博产量 \n",
+ "1439 1 4876.81 7029.96 \n",
+ "1440 1 4876.81 6533.09 \n",
+ "前一天的 3246.6882 \n",
+ "现在的 3249.9268 \n",
+ " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n",
+ "0 1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 \n",
+ "1 1440 2025-05-18 3620 34.8154 1 793.84 6000 110418 3630 \n",
+ "\n",
+ " 即期成本 订单结构 计划产量 京博产量 \n",
+ "0 3246.69 1 4876.81 7029.96 \n",
+ "1 3249.93 1 4876.81 6533.09 \n",
+ "昨日计划提货偏差改之前 793.84\n",
+ "昨日计划提货偏差改之后 1656.2806\n",
+ "**************************************************预测结果: 3622.23\n"
]
}
],
diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
index eb32307..bcc2226 100644
--- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
+++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
@@ -2,14 +2,56 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 12,
+<<<<<<< .mine
+ "execution_count": null,
+||||||| .r87044
+ "execution_count": 1,
+=======
+ "execution_count": 5,
+>>>>>>> .r87201
"metadata": {},
"outputs": [
{
+<<<<<<< .mine
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+ "\n"
+ ]
+ },
+ {
+||||||| .r87044
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n",
+ "\n",
+ "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
+ "\n"
+ ]
+ },
+ {
+=======
+>>>>>>> .r87201
"data": {
"text/html": [
" \n",
+ " \n",
" "
]
},
@@ -33,8 +79,6 @@
"source": [
"import requests\n",
"import json\n",
- "import xlrd\n",
- "import xlwt\n",
"from datetime import datetime, timedelta\n",
"import time\n",
"import pandas as pd\n",
@@ -151,7 +195,7 @@
"\n",
" search_data = {\n",
" \"data\": {\n",
- " \"date\": get_cur_time(date)[0],\n",
+ " \"date\": getNow(date)[0],\n",
" \"dataItemNoList\": dataItemNoList\n",
" },\n",
" \"funcModule\": \"数据项\",\n",
@@ -169,24 +213,64 @@
"\n",
"# xls文件处理\n",
"\n",
- "def get_cur_time(date = ''):\n",
- " if date == '':\n",
- " now = datetime.now()\n",
- " else:\n",
- " now = date\n",
- " year = now.year\n",
- " month = now.month\n",
- " day = now.day\n",
"\n",
- " if month < 10:\n",
- " month = \"0\" + str(month)\n",
- " if day < 10:\n",
- " day = \"0\" + str(day)\n",
- " cur_time = str(year) + str(month) + str(day)\n",
- " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
- "# cur_time = '20231007'\n",
- "# cur_time2 = '2023-10-07'\n",
- " return cur_time, cur_time2\n",
+ "\n",
+ "def getNow(date='', offset=0):\n",
+ " \"\"\"生成指定日期的两种格式字符串\n",
+ " Args:\n",
+ " date: 支持多种输入类型:\n",
+ " - datetime对象\n",
+ " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n",
+ " - 空字符串表示当前日期\n",
+ " offset: 日期偏移天数\n",
+ " Returns:\n",
+ " tuple: (紧凑日期字符串, 标准日期字符串)\n",
+ " \"\"\"\n",
+ " # 日期解析逻辑\n",
+ " from datetime import datetime,timedelta\n",
+ " if isinstance(date, datetime):\n",
+ " now = date\n",
+ " else:\n",
+ " now = datetime.now()\n",
+ " if date:\n",
+ " # 尝试多种日期格式解析\n",
+ " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n",
+ " try:\n",
+ " now = datetime.strptime(str(date), fmt)\n",
+ " break\n",
+ " except ValueError:\n",
+ " continue\n",
+ " else:\n",
+ " raise ValueError(f\"无法解析的日期格式: {date}\")\n",
+ "\n",
+ " # 应用日期偏移\n",
+ " now = now - timedelta(days=offset)\n",
+ " \n",
+ " # 统一格式化输出\n",
+ " date_str = now.strftime(\"%Y-%m-%d\")\n",
+ " compact_date = date_str.replace(\"-\", \"\")\n",
+ " return compact_date, date_str\n",
+ "\n",
+ "\n",
+ "\n",
+ "# def get_cur_time(date = ''):\n",
+ "# if date == '':\n",
+ "# now = datetime.now()\n",
+ "# else:\n",
+ "# now = date\n",
+ "# year = now.year\n",
+ "# month = now.month\n",
+ "# day = now.day\n",
+ "\n",
+ "# if month < 10:\n",
+ "# month = \"0\" + str(month)\n",
+ "# if day < 10:\n",
+ "# day = \"0\" + str(day)\n",
+ "# cur_time = str(year) + str(month) + str(day)\n",
+ "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
+ "# # cur_time = '20231007'\n",
+ "# # cur_time2 = '2023-10-07'\n",
+ "# return cur_time, cur_time2\n",
"\n",
"\n",
"def get_head_push_auth():\n",
@@ -205,7 +289,7 @@
" \"funcOperation\": \"新增\",\n",
" \"data\": [\n",
" {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
- " \"dataDate\": get_cur_time(date)[0],\n",
+ " \"dataDate\": getNow(date)[0],\n",
" \"dataStatus\": \"add\",\n",
" \"dataValue\": forecast_price()\n",
" }\n",
@@ -217,7 +301,7 @@
" print(res.text)\n",
"\n",
"def forecast_price():\n",
- " df_test = pd.read_excel('沥青数据项.xlsx',sheet_name='数据项历史数据')\n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
" df_test.drop([0],inplace=True)\n",
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
@@ -277,7 +361,7 @@
"\n",
" pd.set_option('display.max_rows',40) \n",
" pd.set_option('display.max_columns',40) \n",
- " df_test = pd.read_excel('沥青数据项.xlsx',sheet_name='数据项历史数据')\n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
" df_test.drop([0],inplace=True)\n",
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
@@ -434,31 +518,40 @@
" pickle.dump(grid_search_XGB, file)\n",
"\n",
"def read_xls_data():\n",
+ " \"\"\"获取特征项ID\"\"\"\n",
" global one_cols, two_cols\n",
- " # 打开 XLS 文件\n",
- " workbook = xlrd.open_workbook(read_file_path_name)\n",
+ " # 使用pandas读取Excel文件\n",
+ " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n",
+ " # 获取第二行数据(索引为1)\n",
+ " one_cols = df.iloc[1].tolist()[1:]\n",
+ " print(f'获取到的数据项ID{one_cols}')\n",
"\n",
- " # 获取所有表格名称\n",
- " # sheet_names = workbook.sheet_names()\n",
+ "# def read_xls_data():\n",
+ "# global one_cols, two_cols\n",
+ "# # 打开 XLS 文件\n",
+ "# workbook = xlrd.open_workbook(read_file_path_name)\n",
"\n",
- " # 选择第一个表格\n",
- " sheet = workbook.sheet_by_index(0)\n",
+ "# # 获取所有表格名称\n",
+ "# # sheet_names = workbook.sheet_names()\n",
"\n",
- " # 获取行数和列数\n",
- " num_rows = sheet.nrows\n",
- " # num_cols = sheet.ncols\n",
+ "# # 选择第一个表格\n",
+ "# sheet = workbook.sheet_by_index(0)\n",
"\n",
- " # 遍历每一行,获取单元格数据\n",
- " # for i in range(num_rows):\n",
- " # row_data = sheet.row_values(i)\n",
- " # one_cols.append(row_data)\n",
- " # two_cols.append(row_data[1])\n",
+ "# # 获取行数和列数\n",
+ "# num_rows = sheet.nrows\n",
+ "# # num_cols = sheet.ncols\n",
"\n",
- " row_data = sheet.row_values(1)\n",
- " one_cols = row_data\n",
+ "# # 遍历每一行,获取单元格数据\n",
+ "# # for i in range(num_rows):\n",
+ "# # row_data = sheet.row_values(i)\n",
+ "# # one_cols.append(row_data)\n",
+ "# # two_cols.append(row_data[1])\n",
"\n",
- " # 关闭 XLS 文件\n",
- " # workbook.close()\n",
+ "# row_data = sheet.row_values(1)\n",
+ "# one_cols = row_data\n",
+ "\n",
+ "# # 关闭 XLS 文件\n",
+ "# # workbook.close()\n",
"\n",
"def start_3(date=None,token=None,token_push=None):\n",
" '''预测上传数据'''\n",
@@ -570,74 +663,111 @@
" else:\n",
" return None\n",
"\n",
- "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
"\n",
+ "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
" current_year_month = datetime.now().strftime('%Y-%m')\n",
" grouped = data_df.groupby(\"dataDate\")\n",
"\n",
- " # 打开xls文件\n",
- " workbook = xlrd.open_workbook('沥青数据项.xls')\n",
- "\n",
- " # 获取所有sheet的个数\n",
- " sheet_count = len(workbook.sheet_names())\n",
- "\n",
- " # 获取所有sheet的名称\n",
- " sheet_names = workbook.sheet_names()\n",
- "\n",
- " new_workbook = xlwt.Workbook()\n",
- " for i in range(sheet_count):\n",
- " # 获取当前sheet\n",
- " sheet = workbook.sheet_by_index(i)\n",
- "\n",
- " # 获取sheet的行数和列数\n",
- " row_count = sheet.nrows\n",
- " col_count = sheet.ncols\n",
- " # 获取原有数据\n",
- " data = []\n",
- " for row in range(row_count):\n",
- " row_data = []\n",
- " for col in range(col_count):\n",
- " row_data.append(sheet.cell_value(row, col))\n",
- " data.append(row_data)\n",
- "\n",
- " # 创建xlwt的Workbook对象\n",
- " # 创建sheet\n",
- " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
- "\n",
+ " # 使用openpyxl打开xlsx文件\n",
+ " from openpyxl import load_workbook\n",
+ " workbook = load_workbook('沥青数据项.xlsx')\n",
"\n",
+ " # 创建新工作簿\n",
+ " new_workbook = load_workbook('沥青数据项.xlsx')\n",
+ " \n",
+ " for sheetname in workbook.sheetnames:\n",
+ " sheet = workbook[sheetname]\n",
+ " new_sheet = new_workbook[sheetname]\n",
+ " \n",
" current_year_month_row = 0\n",
- " # 将原有的数据写入新的sheet\n",
- " for row in range(row_count):\n",
- " for col in range(col_count):\n",
- " col0 = data[row][0]\n",
- " # print(\"col0\",col0[:7])\n",
- " if col0[:7] == current_year_month:\n",
- " current_year_month_row += 1\n",
- " break\n",
- " new_sheet.write(row, col, data[row][col])\n",
+ " # 查找当前月份数据起始行\n",
+ " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n",
+ " if str(row[0]).startswith(current_year_month):\n",
+ " current_year_month_row += 1\n",
"\n",
- "\n",
- " # print(\"current_year_month_row\",current_year_month_row)\n",
- " if i == 0:\n",
- " rowFlag = 0\n",
- " # 查看每组数据\n",
- " for date, group in grouped:\n",
- " new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n",
- " for j in range(len(dataItemNoList)):\n",
- " dataItemNo = dataItemNoList[j]\n",
- "\n",
- " # for dataItemNo in dataItemNoList:\n",
+ " # 追加新数据\n",
+ " if sheetname == workbook.sheetnames[0]:\n",
+ " start_row = sheet.max_row - current_year_month_row + 1\n",
+ " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n",
+ " new_sheet.cell(row=row_idx, column=1, value=date)\n",
+ " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n",
" if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ " new_sheet.cell(row=row_idx, column=j, \n",
+ " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
"\n",
- " new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
- "\n",
- " rowFlag += 1\n",
- "\n",
- "\n",
- " # 保存新的xls文件\n",
+ " # 保存修改后的xlsx文件\n",
" new_workbook.save(\"沥青数据项.xlsx\")\n",
"\n",
+ "\n",
+ "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ "# from datetime import datetime, timedelta\n",
+ "# current_year_month = datetime.now().strftime('%Y-%m')\n",
+ "# grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ "# # 打开xls文件\n",
+ "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ "# # 获取所有sheet的个数\n",
+ "# sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ "# # 获取所有sheet的名称\n",
+ "# sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# new_workbook = xlwt.Workbook()\n",
+ "# for i in range(sheet_count):\n",
+ "# # 获取当前sheet\n",
+ "# sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ "# # 获取sheet的行数和列数\n",
+ "# row_count = sheet.nrows\n",
+ "# col_count = sheet.ncols\n",
+ "# # 获取原有数据\n",
+ "# data = []\n",
+ "# for row in range(row_count):\n",
+ "# row_data = []\n",
+ "# for col in range(col_count):\n",
+ "# row_data.append(sheet.cell_value(row, col))\n",
+ "# data.append(row_data)\n",
+ "\n",
+ "# # 创建xlwt的Workbook对象\n",
+ "# # 创建sheet\n",
+ "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ "\n",
+ "# current_year_month_row = 0\n",
+ "# # 将原有的数据写入新的sheet\n",
+ "# for row in range(row_count):\n",
+ "# for col in range(col_count):\n",
+ "# col0 = data[row][0]\n",
+ "# # print(\"col0\",col0[:7])\n",
+ "# if col0[:7] == current_year_month:\n",
+ "# current_year_month_row += 1\n",
+ "# break\n",
+ "# new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ "\n",
+ "# # print(\"current_year_month_row\",current_year_month_row)\n",
+ "# if i == 0:\n",
+ "# rowFlag = 0\n",
+ "# # 查看每组数据\n",
+ "# for date, group in grouped:\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n",
+ "# for j in range(len(dataItemNoList)):\n",
+ "# dataItemNo = dataItemNoList[j]\n",
+ "\n",
+ "# # for dataItemNo in dataItemNoList:\n",
+ "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ "\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ "# rowFlag += 1\n",
+ "\n",
+ "\n",
+ "# # 保存新的xls文件\n",
+ "# new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
"def queryDataListItemNos(token=None):\n",
+ " from datetime import datetime, timedelta\n",
" df = pd.read_excel('沥青数据项.xlsx')\n",
" dataItemNoList = df.iloc[0].tolist()[1:]\n",
" \n",
@@ -668,7 +798,130 @@
" save_queryDataListItemNos_xls(data_df,dataItemNoList)\n",
" print('当月数据更新完成')\n",
"\n",
+ "\n",
+ "def save_xls_1(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows - 1\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\") \n",
+ "\n",
+ "\n",
+ "\n",
+ "def start(date=''):\n",
+ " \"\"\"获取当日数据\"\"\"\n",
+ " read_xls_data()\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ " cur_time,cur_time2 = getNow(date)\n",
+ " print(f\"获取{cur_time}数据\")\n",
+ " datas = get_data_value(token, one_cols,date=cur_time)\n",
+ " print(len(datas))\n",
+ " print(datas)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " append_rows = [cur_time2]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " \n",
+ " print('添加的行:',len(append_rows),append_rows)\n",
+ " save_xls_2(append_rows)\n",
+ "\n",
+ "\n",
+ "def save_xls_2(append_rows):\n",
+ " \"\"\"保存或更新数据到Excel文件\n",
+ " 参数:\n",
+ " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n",
+ " \"\"\"\n",
+ " # try:\n",
+ " # 读取现有数据(假设第一行为列名)\n",
+ " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n",
+ " print('文件中的数据列数:',len(df.columns),df.columns)\n",
+ " # 转换append_rows为DataFrame\n",
+ " if len(append_rows) != len(df.columns):\n",
+ " # 去除第二个元素 ,不知道什么原因多一个空数据\n",
+ " append_rows.pop(1)\n",
+ " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n",
+ " # 创建新数据行\n",
+ " new_date = append_rows['日期'].values[0]\n",
+ " \n",
+ " dates = df['日期'].to_list()\n",
+ " # 判断日期是否存在\n",
+ " if new_date in dates:\n",
+ " # 找到日期所在行的索引\n",
+ " date_mask = df['日期'] == new_date\n",
+ " # 存在则更新数据\n",
+ " df.loc[date_mask] = append_rows.values\n",
+ " print(f\"更新 {new_date} 数据\")\n",
+ " else:\n",
+ " # 不存在则追加数据\n",
+ " df = pd.concat([df, append_rows], ignore_index=True)\n",
+ " print(df.head())\n",
+ " print(df.tail())\n",
+ " print(f\"插入 {new_date} 新数据\")\n",
+ " \n",
+ " # 保存更新后的数据\n",
+ " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " \n",
+ " # except FileNotFoundError:\n",
+ " # # 如果文件不存在则创建新文件\n",
+ " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " # except Exception as e:\n",
+ " # print(f\"保存数据时发生错误: {str(e)}\")\n",
+ "\n",
+ "\n",
"def main(start_date=None,token=None,token_push=None):\n",
+ " from datatime import datetime\n",
" if start_date is None:\n",
" start_date = datetime.now()\n",
" if token is None:\n",
@@ -677,63 +930,192 @@
" token_push = get_head_push_auth()\n",
" date = start_date.strftime('%Y%m%d')\n",
" print(date)\n",
+ "# start(date)\n",
" # 更新当月数据\n",
" queryDataListItemNos(token)\n",
" # 训练模型\n",
" optimize_Model()\n",
- " # 预测&上传预测结果\n",
+ " # # 预测&上传预测结果\n",
" upload_data_to_system(token_push,start_date)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
+<<<<<<< .mine
+ "ename": "AttributeError",
+ "evalue": "module 'datetime' has no attribute 'now'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n",
+ "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n",
+ "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'"
+||||||| .r87044
"name": "stdout",
"output_type": "stream",
"text": [
"运行中ing...\n"
+=======
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "20250522\n"
+>>>>>>> .r87201
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n",
+ "\n",
+ "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "当月数据更新完成\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n",
+ "\n",
+ "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Using matplotlib backend: inline\n",
+ "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
+ "Populating the interactive namespace from numpy and matplotlib\n",
+ "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n",
+ "\n",
+ "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n",
+ "`%matplotlib` prevents importing * from pylab and numpy\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Best score: 0.997\n",
+ "Best parameters set:\n",
+ "\tlearning_rate: 0.1\n",
+ "\tmax_depth: 8\n",
+ "\tn_estimators: 90\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n",
+ "\n",
+ "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+ "\n",
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n",
+ "\n",
+ "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "日期\n",
+ "2025-05-22 3600.097412\n",
+ "Name: 日度预测价格, dtype: float32\n",
+ "{\"confirmFlg\":false,\"status\":true}\n"
]
}
],
"source": [
"\n",
- "if __name__ == \"__main__\":\n",
- " print(\"运行中ing...\")\n",
- " # 每天定时12点运行\n",
- " while True:\n",
- " # 获取当前时间\n",
- " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
- " # print(current_time)\n",
+ "# if __name__ == \"__main__\":\n",
+ "# print(\"运行中ing...\")\n",
+ "# # 每天定时12点运行\n",
+ "# while True:\n",
+ "# # 获取当前时间\n",
+ "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
+ "# # print(current_time)\n",
"\n",
+<<<<<<< .mine
+ "# # 判断当前时间是否为执行任务的时间点\n",
+ "# try:\n",
+ "# if current_time == \"09:13:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# elif current_time == \"09:18:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# time.sleep(1)\n",
+ "# except :\n",
+ "# print(f\"{current_time}任务失败\")\n",
+ "main()\n",
+ "main()\n",
+||||||| .r87044
" # 判断当前时间是否为执行任务的时间点\n",
" try:\n",
- " if current_time == \"09:15:00\":\n",
+ " if current_time == \"09:13:00\":\n",
+ " print(\"执行定时任务\")\n",
+ " main()\n",
+ " elif current_time == \"09:18:00\":\n",
" print(\"执行定时任务\")\n",
" main()\n",
- " elif current_time == \"20:00:00\":\n",
- " print('更新数据')\n",
- " start_3()\n",
" time.sleep(1)\n",
" except :\n",
" print(f\"{current_time}任务失败\")\n",
- "\n",
- "\n",
+ "# main()\n",
+=======
+ "# # 判断当前时间是否为执行任务的时间点\n",
+ "# try:\n",
+ "# if current_time == \"09:13:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# elif current_time == \"09:18:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# time.sleep(1)\n",
+ "# except :\n",
+ "# print(f\"{current_time}任务失败\")\n",
+ "main()\n",
+>>>>>>> .r87201
" # 检测数据准确性, 需要检测放开\n",
" # check_data(\"100028098|LISTING_PRICE\")\n",
- " # check_data(\"9137070016544622XB|DAY_Yield\")\n"
+ " # check_data(\"9137070016544622XB|DAY_Yield\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
- "# start_date = datetime(2025, 4, 1)\n",
- "# end_date = datetime(2025, 5, 7)\n",
+ "# start_date = datetime(2025, 5, 16)\n",
+ "# end_date = datetime(2025, 5, 19)\n",
"# token = get_head_auth()\n",
"\n",
"# token_push = get_head_push_auth()\n",
@@ -745,7 +1127,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -753,86 +1135,9 @@
"\n",
"\n",
"class codeBackup:\n",
- " def save_xls_1(append_rows):\n",
- "\n",
- " # 打开xls文件\n",
- " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
- "\n",
- " # 获取所有sheet的个数\n",
- " sheet_count = len(workbook.sheet_names())\n",
- "\n",
- " # 获取所有sheet的名称\n",
- " sheet_names = workbook.sheet_names()\n",
- "\n",
- " new_workbook = xlwt.Workbook()\n",
- " for i in range(sheet_count):\n",
- " # 获取当前sheet\n",
- " sheet = workbook.sheet_by_index(i)\n",
- "\n",
- " # 获取sheet的行数和列数\n",
- " row_count = sheet.nrows - 1\n",
- " col_count = sheet.ncols\n",
- " # 获取原有数据\n",
- " data = []\n",
- " for row in range(row_count):\n",
- " row_data = []\n",
- " for col in range(col_count):\n",
- " row_data.append(sheet.cell_value(row, col))\n",
- " data.append(row_data)\n",
- " # 创建xlwt的Workbook对象\n",
- " # 创建sheet\n",
- " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
- "\n",
- " # 将原有的数据写入新的sheet\n",
- " for row in range(row_count):\n",
- " for col in range(col_count):\n",
- " new_sheet.write(row, col, data[row][col])\n",
- "\n",
- " if i == 0:\n",
- " # 在新的sheet中添加数据\n",
- " for col in range(col_count):\n",
- " new_sheet.write(row_count, col, append_rows[col])\n",
- "\n",
- " # 保存新的xls文件\n",
- " new_workbook.save(\"沥青数据项.xlsx\") \n",
"\n",
" \n",
"\n",
- " def start_2(date,token):\n",
- " '''更新数据'''\n",
- " read_xls_data()\n",
- "\n",
- " # token = get_head_auth()\n",
- " # if not token:\n",
- " # return\n",
- " \n",
- "\n",
- " datas = get_data_value(token, one_cols[1:],date)\n",
- " if not datas:\n",
- " return\n",
- "\n",
- " # data_list = [two_cols, one_cols]\n",
- " append_rows = [get_cur_time(date=date)[1]]\n",
- " dataItemNo_dataValue = {}\n",
- " for data_value in datas:\n",
- " if \"dataValue\" not in data_value:\n",
- " print(data_value)\n",
- " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
- " else:\n",
- " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
- " \n",
- " for value in one_cols[1:]:\n",
- " if value in dataItemNo_dataValue:\n",
- " append_rows.append(dataItemNo_dataValue[value])\n",
- " else:\n",
- " append_rows.append(\"\")\n",
- " print('新增数据:',append_rows)\n",
- " save_xls_1(append_rows)\n",
- "\n",
- " \n",
- " # data_list.append(three_cols)\n",
- " # write_xls(data_list)\n",
- " \n",
"\n",
" def write_xls(data,date):\n",
" # 创建一个Workbook对象\n",
@@ -932,7 +1237,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -946,7 +1251,13 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
+<<<<<<< .mine
+ "version": "3.11.7"
+||||||| .r87044
"version": "3.7.0"
+=======
+ "version": "3.11.11"
+>>>>>>> .r87201
}
},
"nbformat": 4,
diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine
new file mode 100644
index 0000000..5e7fb95
--- /dev/null
+++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine
@@ -0,0 +1,1100 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "import json\n",
+ "import xlrd\n",
+ "import xlwt\n",
+ "from datetime import datetime, timedelta\n",
+ "import time\n",
+ "import pandas as pd\n",
+ "\n",
+ "# 变量定义\n",
+ "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
+ "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n",
+ "\n",
+ "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
+ "\n",
+ "login_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "login_push_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "read_file_path_name = \"沥青数据项.xlsx\"\n",
+ "one_cols = []\n",
+ "two_cols = []\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sn\n",
+ "import random\n",
+ "import time\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "from plotly import __version__\n",
+ "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
+ "\n",
+ "from sklearn import preprocessing\n",
+ "\n",
+ "from pandas import Series,DataFrame\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sklearn.datasets as datasets\n",
+ "\n",
+ "#导入机器学习算法模型\n",
+ "from sklearn.linear_model import Lasso\n",
+ "from xgboost import XGBRegressor\n",
+ "\n",
+ "import statsmodels.api as sm\n",
+ "# from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go\n",
+ "\n",
+ "import xgboost as xgb\n",
+ "from xgboost import plot_importance, plot_tree\n",
+ "from sklearn.metrics import mean_absolute_error\n",
+ "from statsmodels.tools.eval_measures import mse,rmse\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from xgboost import XGBRegressor\n",
+ "import warnings\n",
+ "import pickle\n",
+ "\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ "#切割训练数据和样本数据\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "#用于模型评分\n",
+ "from sklearn.metrics import r2_score\n",
+ "\n",
+ "le = preprocessing.LabelEncoder()\n",
+ "\n",
+ "# print(__version__) # requires version >= 1.9.0\n",
+ "\n",
+ "\n",
+ "import cufflinks as cf\n",
+ "cf.go_offline()\n",
+ "\n",
+ "random.seed(100)\n",
+ "\n",
+ "%matplotlib inline\n",
+ "\n",
+ "# 数据获取\n",
+ "\n",
+ "def get_head_auth():\n",
+ " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def get_data_value(token, dataItemNoList,date=''):\n",
+ "\n",
+ " search_data = {\n",
+ " \"data\": {\n",
+ " \"date\": getNow(date)[0],\n",
+ " \"dataItemNoList\": dataItemNoList\n",
+ " },\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\"\n",
+ " }\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " print(\"今天没有新数据\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "# xls文件处理\n",
+ "\n",
+ "\n",
+ "\n",
+ "def getNow(date='', offset=0):\n",
+ " \"\"\"生成指定日期的两种格式字符串\n",
+ " Args:\n",
+ " date: 支持多种输入类型:\n",
+ " - datetime对象\n",
+ " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n",
+ " - 空字符串表示当前日期\n",
+ " offset: 日期偏移天数\n",
+ " Returns:\n",
+ " tuple: (紧凑日期字符串, 标准日期字符串)\n",
+ " \"\"\"\n",
+ " # 日期解析逻辑\n",
+ " from datetime import datetime,timedelta\n",
+ " if isinstance(date, datetime):\n",
+ " now = date\n",
+ " else:\n",
+ " now = datetime.now()\n",
+ " if date:\n",
+ " # 尝试多种日期格式解析\n",
+ " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n",
+ " try:\n",
+ " now = datetime.strptime(str(date), fmt)\n",
+ " break\n",
+ " except ValueError:\n",
+ " continue\n",
+ " else:\n",
+ " raise ValueError(f\"无法解析的日期格式: {date}\")\n",
+ "\n",
+ " # 应用日期偏移\n",
+ " now = now - timedelta(days=offset)\n",
+ " \n",
+ " # 统一格式化输出\n",
+ " date_str = now.strftime(\"%Y-%m-%d\")\n",
+ " compact_date = date_str.replace(\"-\", \"\")\n",
+ " return compact_date, date_str\n",
+ "\n",
+ "\n",
+ "\n",
+ "# def get_cur_time(date = ''):\n",
+ "# if date == '':\n",
+ "# now = datetime.now()\n",
+ "# else:\n",
+ "# now = date\n",
+ "# year = now.year\n",
+ "# month = now.month\n",
+ "# day = now.day\n",
+ "\n",
+ "# if month < 10:\n",
+ "# month = \"0\" + str(month)\n",
+ "# if day < 10:\n",
+ "# day = \"0\" + str(day)\n",
+ "# cur_time = str(year) + str(month) + str(day)\n",
+ "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
+ "# # cur_time = '20231007'\n",
+ "# # cur_time2 = '2023-10-07'\n",
+ "# return cur_time, cur_time2\n",
+ "\n",
+ "\n",
+ "def get_head_push_auth():\n",
+ " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "def upload_data_to_system(token_push,date):\n",
+ " data = {\n",
+ " \"funcModule\": \"数据表信息列表\",\n",
+ " \"funcOperation\": \"新增\",\n",
+ " \"data\": [\n",
+ " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
+ " \"dataDate\": getNow(date)[0],\n",
+ " \"dataStatus\": \"add\",\n",
+ " \"dataValue\": forecast_price()\n",
+ " }\n",
+ "\n",
+ " ]\n",
+ " }\n",
+ " headers = {\"Authorization\": token_push}\n",
+ " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
+ " print(res.text)\n",
+ "\n",
+ "def forecast_price():\n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n",
+ " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ "\n",
+ " #查看每个特征缺失值数量\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ "\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ "\n",
+ " # 选择用于模型训练的列名称\n",
+ " col_for_training = df_test_1.columns\n",
+ " import joblib\n",
+ " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n",
+ " # 最新的一天为最后一行的数据\n",
+ " df_test_1_Day = df_test_1.tail(1)\n",
+ " # 移除不需要的列\n",
+ " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n",
+ " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n",
+ " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n",
+ " df_test_1_Day=df_test_1_Day.dropna()\n",
+ "\n",
+ " # df_test_1_Day\n",
+ " #预测今日价格,显示至小数点后两位\n",
+ " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
+ "\n",
+ " df_test_1_Day['日度预测价格']=Ypredict_Today\n",
+ " print(df_test_1_Day['日度预测价格'])\n",
+ " a = df_test_1_Day['日度预测价格']\n",
+ " a = a[0]\n",
+ " a = float(a)\n",
+ " a = round(a,2)\n",
+ " return a\n",
+ "def optimize_Model():\n",
+ " from sklearn.model_selection import train_test_split\n",
+ " from sklearn.impute import SimpleImputer\n",
+ " from sklearn.preprocessing import OrdinalEncoder\n",
+ " from sklearn.feature_selection import SelectFromModel\n",
+ " from sklearn.metrics import mean_squared_error, r2_score\n",
+ "\n",
+ " pd.set_option('display.max_rows',40) \n",
+ " pd.set_option('display.max_columns',40) \n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ " # df_test.tail(3)\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n",
+ " df_test_1.index = df_test_1[\"日期\"]\n",
+ " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n",
+ " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n",
+ "\n",
+ " y=df_test_1['京博指导价']\n",
+ "\n",
+ " x=dataset1 \n",
+ "\n",
+ " train = x\n",
+ " target = y\n",
+ "\n",
+ " #切割数据样本集合测试集\n",
+ " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
+ " \n",
+ " \n",
+ " from sklearn.linear_model import Lasso\n",
+ " from xgboost import XGBRegressor\n",
+ "\n",
+ " import statsmodels.api as sm\n",
+ " # from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ " import plotly.express as px\n",
+ " import plotly.graph_objects as go\n",
+ "\n",
+ " import xgboost as xgb\n",
+ " from xgboost import plot_importance, plot_tree\n",
+ " from sklearn.metrics import mean_absolute_error\n",
+ " from statsmodels.tools.eval_measures import mse,rmse\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ " from xgboost import XGBRegressor\n",
+ " import warnings\n",
+ " import pickle\n",
+ "\n",
+ " from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ " #切割训练数据和样本数据\n",
+ " from sklearn.model_selection import train_test_split\n",
+ "\n",
+ " #用于模型评分\n",
+ " from sklearn.metrics import r2_score\n",
+ "\n",
+ " #模型缩写\n",
+ " Lasso = Lasso(random_state=0)\n",
+ " XGBR = XGBRegressor(random_state=0)\n",
+ " Lasso.fit(X_train,y_train)\n",
+ " XGBR.fit(X_train,y_train)\n",
+ " y_pre_Lasso = Lasso.predict(x_test)\n",
+ " y_pre_XGBR = XGBR.predict(x_test)\n",
+ "\n",
+ " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
+ " Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
+ " XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
+ "\n",
+ " #计算Lasso、XGBR的MSE和RMSE\n",
+ " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
+ " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
+ "\n",
+ " Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
+ " XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
+ " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
+ " ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
+ " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results1=model_results.set_index('模型(Model)')\n",
+ "\n",
+ " def plot_feature_importance(importance,names,model_type):\n",
+ " feature_importance = np.array(importance)\n",
+ " feature_names = np.array(names)\n",
+ "\n",
+ " data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
+ " fi_df = pd.DataFrame(data)\n",
+ "\n",
+ " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
+ "\n",
+ " plt.figure(figsize=(10,8))\n",
+ " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
+ "\n",
+ " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
+ " plt.xlabel('FEATURE IMPORTANCE')\n",
+ " plt.ylabel('FEATURE NAMES')\n",
+ " from pylab import mpl\n",
+ " %pylab\n",
+ " mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
+ " from xgboost import XGBRegressor\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ " estimator = XGBRegressor(random_state=0,\n",
+ " nthread=4,\n",
+ " seed=0\n",
+ " )\n",
+ " parameters = {\n",
+ " 'max_depth': range (2, 11, 2), # 树的最大深度\n",
+ " 'n_estimators': range (50, 101, 10), # 迭代次数\n",
+ " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
+ " }\n",
+ "\n",
+ " grid_search_XGB = GridSearchCV(\n",
+ " estimator=estimator,\n",
+ " param_grid=parameters,\n",
+ " # n_jobs = 10,\n",
+ " cv = 3,\n",
+ " verbose=True\n",
+ " )\n",
+ "\n",
+ " grid_search_XGB.fit(X_train, y_train)\n",
+ " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n",
+ " print(\"Best parameters set:\")\n",
+ " best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
+ " for param_name in sorted(parameters.keys()):\n",
+ " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n",
+ " y_pred = grid_search_XGB.predict(x_test)\n",
+ "\n",
+ " op_XGBR_score = r2_score(y_true,y_pred)\n",
+ " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
+ " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
+ "\n",
+ " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
+ " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results2=model_results2.set_index('模型(Model)')\n",
+ "\n",
+ " # results = model_results1.append(model_results2, ignore_index = False)\n",
+ " results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
+ " results\n",
+ " import pickle\n",
+ "\n",
+ " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n",
+ "\n",
+ " with open(Pkl_Filename, 'wb') as file: \n",
+ " pickle.dump(grid_search_XGB, file)\n",
+ "\n",
+ "def read_xls_data():\n",
+ " \"\"\"获取特征项ID\"\"\"\n",
+ " global one_cols, two_cols\n",
+ " # 使用pandas读取Excel文件\n",
+ " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n",
+ " # 获取第二行数据(索引为1)\n",
+ " one_cols = df.iloc[1].tolist()[1:]\n",
+ " print(f'获取到的数据项ID{one_cols}')\n",
+ "\n",
+ "# def read_xls_data():\n",
+ "# global one_cols, two_cols\n",
+ "# # 打开 XLS 文件\n",
+ "# workbook = xlrd.open_workbook(read_file_path_name)\n",
+ "\n",
+ "# # 获取所有表格名称\n",
+ "# # sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# # 选择第一个表格\n",
+ "# sheet = workbook.sheet_by_index(0)\n",
+ "\n",
+ "# # 获取行数和列数\n",
+ "# num_rows = sheet.nrows\n",
+ "# # num_cols = sheet.ncols\n",
+ "\n",
+ "# # 遍历每一行,获取单元格数据\n",
+ "# # for i in range(num_rows):\n",
+ "# # row_data = sheet.row_values(i)\n",
+ "# # one_cols.append(row_data)\n",
+ "# # two_cols.append(row_data[1])\n",
+ "\n",
+ "# row_data = sheet.row_values(1)\n",
+ "# one_cols = row_data\n",
+ "\n",
+ "# # 关闭 XLS 文件\n",
+ "# # workbook.close()\n",
+ "\n",
+ "def start_3(date=None,token=None,token_push=None):\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " if date==None:\n",
+ " date = datetime.now()\n",
+ " if token==None:\n",
+ " token = get_head_auth()\n",
+ " if token_push==None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:],date)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time(date)[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ "\n",
+ " # 获取当月的数据写入到指定文件\n",
+ " # optimize_Model()\n",
+ " # upload_data_to_system(token_push,date)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "def check_data(dataItemNo):\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, dataItemNo)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ "def save_xls(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xls')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n",
+ "\n",
+ " search_data = {\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\",\n",
+ " \"data\": {\n",
+ " \"dateStart\": dateStart,\n",
+ " \"dateEnd\": dateEnd,\n",
+ " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ " current_year_month = datetime.now().strftime('%Y-%m')\n",
+ " grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ " # 使用openpyxl打开xlsx文件\n",
+ " from openpyxl import load_workbook\n",
+ " workbook = load_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 创建新工作簿\n",
+ " new_workbook = load_workbook('沥青数据项.xlsx')\n",
+ " \n",
+ " for sheetname in workbook.sheetnames:\n",
+ " sheet = workbook[sheetname]\n",
+ " new_sheet = new_workbook[sheetname]\n",
+ " \n",
+ " current_year_month_row = 0\n",
+ " # 查找当前月份数据起始行\n",
+ " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n",
+ " if str(row[0]).startswith(current_year_month):\n",
+ " current_year_month_row += 1\n",
+ "\n",
+ " # 追加新数据\n",
+ " if sheetname == workbook.sheetnames[0]:\n",
+ " start_row = sheet.max_row - current_year_month_row + 1\n",
+ " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n",
+ " new_sheet.cell(row=row_idx, column=1, value=date)\n",
+ " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n",
+ " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ " new_sheet.cell(row=row_idx, column=j, \n",
+ " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ " # 保存修改后的xlsx文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "\n",
+ "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ "# from datetime import datetime, timedelta\n",
+ "# current_year_month = datetime.now().strftime('%Y-%m')\n",
+ "# grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ "# # 打开xls文件\n",
+ "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ "# # 获取所有sheet的个数\n",
+ "# sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ "# # 获取所有sheet的名称\n",
+ "# sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# new_workbook = xlwt.Workbook()\n",
+ "# for i in range(sheet_count):\n",
+ "# # 获取当前sheet\n",
+ "# sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ "# # 获取sheet的行数和列数\n",
+ "# row_count = sheet.nrows\n",
+ "# col_count = sheet.ncols\n",
+ "# # 获取原有数据\n",
+ "# data = []\n",
+ "# for row in range(row_count):\n",
+ "# row_data = []\n",
+ "# for col in range(col_count):\n",
+ "# row_data.append(sheet.cell_value(row, col))\n",
+ "# data.append(row_data)\n",
+ "\n",
+ "# # 创建xlwt的Workbook对象\n",
+ "# # 创建sheet\n",
+ "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ "\n",
+ "# current_year_month_row = 0\n",
+ "# # 将原有的数据写入新的sheet\n",
+ "# for row in range(row_count):\n",
+ "# for col in range(col_count):\n",
+ "# col0 = data[row][0]\n",
+ "# # print(\"col0\",col0[:7])\n",
+ "# if col0[:7] == current_year_month:\n",
+ "# current_year_month_row += 1\n",
+ "# break\n",
+ "# new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ "\n",
+ "# # print(\"current_year_month_row\",current_year_month_row)\n",
+ "# if i == 0:\n",
+ "# rowFlag = 0\n",
+ "# # 查看每组数据\n",
+ "# for date, group in grouped:\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n",
+ "# for j in range(len(dataItemNoList)):\n",
+ "# dataItemNo = dataItemNoList[j]\n",
+ "\n",
+ "# # for dataItemNo in dataItemNoList:\n",
+ "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ "\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ "# rowFlag += 1\n",
+ "\n",
+ "\n",
+ "# # 保存新的xls文件\n",
+ "# new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def queryDataListItemNos(token=None):\n",
+ " from datetime import datetime, timedelta\n",
+ " df = pd.read_excel('沥青数据项.xlsx')\n",
+ " dataItemNoList = df.iloc[0].tolist()[1:]\n",
+ " \n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " if not token:\n",
+ " print('token获取失败')\n",
+ " return\n",
+ "\n",
+ " # 获取当前日期\n",
+ " current_date = datetime.now()\n",
+ "\n",
+ " # 获取当月1日\n",
+ " first_day_of_month = current_date.replace(day=1)\n",
+ "\n",
+ " # 格式化为 YYYYMMDD 格式\n",
+ " dateEnd = current_date.strftime('%Y%m%d')\n",
+ " dateStart = first_day_of_month.strftime('%Y%m%d')\n",
+ "\n",
+ " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n",
+ "\n",
+ "\n",
+ " data_df = pd.DataFrame(search_value)\n",
+ "\n",
+ " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n",
+ " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n",
+ " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n",
+ " print('当月数据更新完成')\n",
+ "\n",
+ "\n",
+ "def save_xls_1(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows - 1\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\") \n",
+ "\n",
+ "\n",
+ "\n",
+ "def start(date=''):\n",
+ " \"\"\"获取当日数据\"\"\"\n",
+ " read_xls_data()\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ " cur_time,cur_time2 = getNow(date)\n",
+ " print(f\"获取{cur_time}数据\")\n",
+ " datas = get_data_value(token, one_cols,date=cur_time)\n",
+ " print(len(datas))\n",
+ " print(datas)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " append_rows = [cur_time2]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " \n",
+ " print('添加的行:',len(append_rows),append_rows)\n",
+ " save_xls_2(append_rows)\n",
+ "\n",
+ "\n",
+ "def save_xls_2(append_rows):\n",
+ " \"\"\"保存或更新数据到Excel文件\n",
+ " 参数:\n",
+ " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n",
+ " \"\"\"\n",
+ " # try:\n",
+ " # 读取现有数据(假设第一行为列名)\n",
+ " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n",
+ " print('文件中的数据列数:',len(df.columns),df.columns)\n",
+ " # 转换append_rows为DataFrame\n",
+ " if len(append_rows) != len(df.columns):\n",
+ " # 去除第二个元素 ,不知道什么原因多一个空数据\n",
+ " append_rows.pop(1)\n",
+ " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n",
+ " # 创建新数据行\n",
+ " new_date = append_rows['日期'].values[0]\n",
+ " \n",
+ " dates = df['日期'].to_list()\n",
+ " # 判断日期是否存在\n",
+ " if new_date in dates:\n",
+ " # 找到日期所在行的索引\n",
+ " date_mask = df['日期'] == new_date\n",
+ " # 存在则更新数据\n",
+ " df.loc[date_mask] = append_rows.values\n",
+ " print(f\"更新 {new_date} 数据\")\n",
+ " else:\n",
+ " # 不存在则追加数据\n",
+ " df = pd.concat([df, append_rows], ignore_index=True)\n",
+ " print(df.head())\n",
+ " print(df.tail())\n",
+ " print(f\"插入 {new_date} 新数据\")\n",
+ " \n",
+ " # 保存更新后的数据\n",
+ " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " \n",
+ " # except FileNotFoundError:\n",
+ " # # 如果文件不存在则创建新文件\n",
+ " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " # except Exception as e:\n",
+ " # print(f\"保存数据时发生错误: {str(e)}\")\n",
+ "\n",
+ "\n",
+ "def main(start_date=None,token=None,token_push=None):\n",
+ " from datatime import datetime\n",
+ " if start_date is None:\n",
+ " start_date = datetime.now()\n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ " if token_push is None:\n",
+ " token_push = get_head_push_auth()\n",
+ " date = start_date.strftime('%Y%m%d')\n",
+ " print(date)\n",
+ "# start(date)\n",
+ " # 更新当月数据\n",
+ " queryDataListItemNos(token)\n",
+ " # 训练模型\n",
+ " optimize_Model()\n",
+ " # # 预测&上传预测结果\n",
+ " upload_data_to_system(token_push,start_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "module 'datetime' has no attribute 'now'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n",
+ "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n",
+ "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "# if __name__ == \"__main__\":\n",
+ "# print(\"运行中ing...\")\n",
+ "# # 每天定时12点运行\n",
+ "# while True:\n",
+ "# # 获取当前时间\n",
+ "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
+ "# # print(current_time)\n",
+ "\n",
+ "# # 判断当前时间是否为执行任务的时间点\n",
+ "# try:\n",
+ "# if current_time == \"09:13:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# elif current_time == \"09:18:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# time.sleep(1)\n",
+ "# except :\n",
+ "# print(f\"{current_time}任务失败\")\n",
+ "main()\n",
+ "main()\n",
+ " # 检测数据准确性, 需要检测放开\n",
+ " # check_data(\"100028098|LISTING_PRICE\")\n",
+ " # check_data(\"9137070016544622XB|DAY_Yield\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_date = datetime(2025, 5, 16)\n",
+ "# end_date = datetime(2025, 5, 19)\n",
+ "# token = get_head_auth()\n",
+ "\n",
+ "# token_push = get_head_push_auth()\n",
+ "\n",
+ "# while start_date < end_date:\n",
+ "# main(start_date,token,token_push)\n",
+ "# start_date += timedelta(days=1)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### 代码备份:\n",
+ "\n",
+ "\n",
+ "class codeBackup:\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " def write_xls(data,date):\n",
+ " # 创建一个Workbook对象\n",
+ " workbook = xlwt.Workbook()\n",
+ "\n",
+ " # 创建一个Sheet对象,可指定名称\n",
+ " sheet = workbook.load('Sheet1')\n",
+ "\n",
+ " # 写入数据行\n",
+ " for row_index, row_data in enumerate(data):\n",
+ " for col_index, cell_data in enumerate(row_data):\n",
+ " sheet.write(row_index, col_index, cell_data)\n",
+ "\n",
+ " # 保存Workbook到文件\n",
+ " workbook.save(get_cur_time(date)[0] + '.xls')\n",
+ "\n",
+ "\n",
+ " def start():\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " token_push = get_head_push_auth()\n",
+ " if not token_push:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ " \n",
+ " # 获取当月的数据写入到指定文件\n",
+ " queryDataListItemNos(token)\n",
+ " optimize_Model()\n",
+ " upload_data_to_system(token_push)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " def start_1():\n",
+ " '''更新数据'''\n",
+ " print(\"更新当天数据\")\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " print(\"当天数据为:\",append_rows)\n",
+ " save_xls_1(append_rows)\n",
+ "\n",
+ " \n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044
new file mode 100644
index 0000000..2f999bd
--- /dev/null
+++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044
@@ -0,0 +1,1095 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n",
+ "\n",
+ "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "import json\n",
+ "import xlrd\n",
+ "import xlwt\n",
+ "from datetime import datetime, timedelta\n",
+ "import time\n",
+ "import pandas as pd\n",
+ "\n",
+ "# 变量定义\n",
+ "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
+ "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n",
+ "\n",
+ "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
+ "\n",
+ "login_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "login_push_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "read_file_path_name = \"沥青数据项.xlsx\"\n",
+ "one_cols = []\n",
+ "two_cols = []\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sn\n",
+ "import random\n",
+ "import time\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "from plotly import __version__\n",
+ "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
+ "\n",
+ "from sklearn import preprocessing\n",
+ "\n",
+ "from pandas import Series,DataFrame\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sklearn.datasets as datasets\n",
+ "\n",
+ "#导入机器学习算法模型\n",
+ "from sklearn.linear_model import Lasso\n",
+ "from xgboost import XGBRegressor\n",
+ "\n",
+ "import statsmodels.api as sm\n",
+ "# from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go\n",
+ "\n",
+ "import xgboost as xgb\n",
+ "from xgboost import plot_importance, plot_tree\n",
+ "from sklearn.metrics import mean_absolute_error\n",
+ "from statsmodels.tools.eval_measures import mse,rmse\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from xgboost import XGBRegressor\n",
+ "import warnings\n",
+ "import pickle\n",
+ "\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ "#切割训练数据和样本数据\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "#用于模型评分\n",
+ "from sklearn.metrics import r2_score\n",
+ "\n",
+ "le = preprocessing.LabelEncoder()\n",
+ "\n",
+ "# print(__version__) # requires version >= 1.9.0\n",
+ "\n",
+ "\n",
+ "import cufflinks as cf\n",
+ "cf.go_offline()\n",
+ "\n",
+ "random.seed(100)\n",
+ "\n",
+ "%matplotlib inline\n",
+ "\n",
+ "# 数据获取\n",
+ "\n",
+ "def get_head_auth():\n",
+ " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def get_data_value(token, dataItemNoList,date=''):\n",
+ "\n",
+ " search_data = {\n",
+ " \"data\": {\n",
+ " \"date\": getNow(date)[0],\n",
+ " \"dataItemNoList\": dataItemNoList\n",
+ " },\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\"\n",
+ " }\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " print(\"今天没有新数据\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "# xls文件处理\n",
+ "\n",
+ "\n",
+ "\n",
+ "def getNow(date='', offset=0):\n",
+ " \"\"\"生成指定日期的两种格式字符串\n",
+ " Args:\n",
+ " date: 支持多种输入类型:\n",
+ " - datetime对象\n",
+ " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n",
+ " - 空字符串表示当前日期\n",
+ " offset: 日期偏移天数\n",
+ " Returns:\n",
+ " tuple: (紧凑日期字符串, 标准日期字符串)\n",
+ " \"\"\"\n",
+ " # 日期解析逻辑\n",
+ " from datetime import datetime,timedelta\n",
+ " if isinstance(date, datetime):\n",
+ " now = date\n",
+ " else:\n",
+ " now = datetime.now()\n",
+ " if date:\n",
+ " # 尝试多种日期格式解析\n",
+ " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n",
+ " try:\n",
+ " now = datetime.strptime(str(date), fmt)\n",
+ " break\n",
+ " except ValueError:\n",
+ " continue\n",
+ " else:\n",
+ " raise ValueError(f\"无法解析的日期格式: {date}\")\n",
+ "\n",
+ " # 应用日期偏移\n",
+ " now = now - timedelta(days=offset)\n",
+ " \n",
+ " # 统一格式化输出\n",
+ " date_str = now.strftime(\"%Y-%m-%d\")\n",
+ " compact_date = date_str.replace(\"-\", \"\")\n",
+ " return compact_date, date_str\n",
+ "\n",
+ "\n",
+ "\n",
+ "# def get_cur_time(date = ''):\n",
+ "# if date == '':\n",
+ "# now = datetime.now()\n",
+ "# else:\n",
+ "# now = date\n",
+ "# year = now.year\n",
+ "# month = now.month\n",
+ "# day = now.day\n",
+ "\n",
+ "# if month < 10:\n",
+ "# month = \"0\" + str(month)\n",
+ "# if day < 10:\n",
+ "# day = \"0\" + str(day)\n",
+ "# cur_time = str(year) + str(month) + str(day)\n",
+ "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
+ "# # cur_time = '20231007'\n",
+ "# # cur_time2 = '2023-10-07'\n",
+ "# return cur_time, cur_time2\n",
+ "\n",
+ "\n",
+ "def get_head_push_auth():\n",
+ " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "def upload_data_to_system(token_push,date):\n",
+ " data = {\n",
+ " \"funcModule\": \"数据表信息列表\",\n",
+ " \"funcOperation\": \"新增\",\n",
+ " \"data\": [\n",
+ " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
+ " \"dataDate\": getNow(date)[0],\n",
+ " \"dataStatus\": \"add\",\n",
+ " \"dataValue\": forecast_price()\n",
+ " }\n",
+ "\n",
+ " ]\n",
+ " }\n",
+ " headers = {\"Authorization\": token_push}\n",
+ " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
+ " print(res.text)\n",
+ "\n",
+ "def forecast_price():\n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n",
+ " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ "\n",
+ " #查看每个特征缺失值数量\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ "\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ "\n",
+ " # 选择用于模型训练的列名称\n",
+ " col_for_training = df_test_1.columns\n",
+ " import joblib\n",
+ " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n",
+ " # 最新的一天为最后一行的数据\n",
+ " df_test_1_Day = df_test_1.tail(1)\n",
+ " # 移除不需要的列\n",
+ " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n",
+ " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n",
+ " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n",
+ " df_test_1_Day=df_test_1_Day.dropna()\n",
+ "\n",
+ " # df_test_1_Day\n",
+ " #预测今日价格,显示至小数点后两位\n",
+ " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
+ "\n",
+ " df_test_1_Day['日度预测价格']=Ypredict_Today\n",
+ " print(df_test_1_Day['日度预测价格'])\n",
+ " a = df_test_1_Day['日度预测价格']\n",
+ " a = a[0]\n",
+ " a = float(a)\n",
+ " a = round(a,2)\n",
+ " return a\n",
+ "def optimize_Model():\n",
+ " from sklearn.model_selection import train_test_split\n",
+ " from sklearn.impute import SimpleImputer\n",
+ " from sklearn.preprocessing import OrdinalEncoder\n",
+ " from sklearn.feature_selection import SelectFromModel\n",
+ " from sklearn.metrics import mean_squared_error, r2_score\n",
+ "\n",
+ " pd.set_option('display.max_rows',40) \n",
+ " pd.set_option('display.max_columns',40) \n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ " # df_test.tail(3)\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n",
+ " df_test_1.index = df_test_1[\"日期\"]\n",
+ " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n",
+ " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n",
+ "\n",
+ " y=df_test_1['京博指导价']\n",
+ "\n",
+ " x=dataset1 \n",
+ "\n",
+ " train = x\n",
+ " target = y\n",
+ "\n",
+ " #切割数据样本集合测试集\n",
+ " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
+ " \n",
+ " \n",
+ " from sklearn.linear_model import Lasso\n",
+ " from xgboost import XGBRegressor\n",
+ "\n",
+ " import statsmodels.api as sm\n",
+ " # from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ " import plotly.express as px\n",
+ " import plotly.graph_objects as go\n",
+ "\n",
+ " import xgboost as xgb\n",
+ " from xgboost import plot_importance, plot_tree\n",
+ " from sklearn.metrics import mean_absolute_error\n",
+ " from statsmodels.tools.eval_measures import mse,rmse\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ " from xgboost import XGBRegressor\n",
+ " import warnings\n",
+ " import pickle\n",
+ "\n",
+ " from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ " #切割训练数据和样本数据\n",
+ " from sklearn.model_selection import train_test_split\n",
+ "\n",
+ " #用于模型评分\n",
+ " from sklearn.metrics import r2_score\n",
+ "\n",
+ " #模型缩写\n",
+ " Lasso = Lasso(random_state=0)\n",
+ " XGBR = XGBRegressor(random_state=0)\n",
+ " Lasso.fit(X_train,y_train)\n",
+ " XGBR.fit(X_train,y_train)\n",
+ " y_pre_Lasso = Lasso.predict(x_test)\n",
+ " y_pre_XGBR = XGBR.predict(x_test)\n",
+ "\n",
+ " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
+ " Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
+ " XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
+ "\n",
+ " #计算Lasso、XGBR的MSE和RMSE\n",
+ " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
+ " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
+ "\n",
+ " Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
+ " XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
+ " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
+ " ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
+ " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results1=model_results.set_index('模型(Model)')\n",
+ "\n",
+ " def plot_feature_importance(importance,names,model_type):\n",
+ " feature_importance = np.array(importance)\n",
+ " feature_names = np.array(names)\n",
+ "\n",
+ " data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
+ " fi_df = pd.DataFrame(data)\n",
+ "\n",
+ " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
+ "\n",
+ " plt.figure(figsize=(10,8))\n",
+ " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
+ "\n",
+ " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
+ " plt.xlabel('FEATURE IMPORTANCE')\n",
+ " plt.ylabel('FEATURE NAMES')\n",
+ " from pylab import mpl\n",
+ " %pylab\n",
+ " mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
+ " from xgboost import XGBRegressor\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ " estimator = XGBRegressor(random_state=0,\n",
+ " nthread=4,\n",
+ " seed=0\n",
+ " )\n",
+ " parameters = {\n",
+ " 'max_depth': range (2, 11, 2), # 树的最大深度\n",
+ " 'n_estimators': range (50, 101, 10), # 迭代次数\n",
+ " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
+ " }\n",
+ "\n",
+ " grid_search_XGB = GridSearchCV(\n",
+ " estimator=estimator,\n",
+ " param_grid=parameters,\n",
+ " # n_jobs = 10,\n",
+ " cv = 3,\n",
+ " verbose=True\n",
+ " )\n",
+ "\n",
+ " grid_search_XGB.fit(X_train, y_train)\n",
+ " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n",
+ " print(\"Best parameters set:\")\n",
+ " best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
+ " for param_name in sorted(parameters.keys()):\n",
+ " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n",
+ " y_pred = grid_search_XGB.predict(x_test)\n",
+ "\n",
+ " op_XGBR_score = r2_score(y_true,y_pred)\n",
+ " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
+ " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
+ "\n",
+ " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
+ " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results2=model_results2.set_index('模型(Model)')\n",
+ "\n",
+ " # results = model_results1.append(model_results2, ignore_index = False)\n",
+ " results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
+ " results\n",
+ " import pickle\n",
+ "\n",
+ " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n",
+ "\n",
+ " with open(Pkl_Filename, 'wb') as file: \n",
+ " pickle.dump(grid_search_XGB, file)\n",
+ "\n",
+ "def read_xls_data():\n",
+ " \"\"\"获取特征项ID\"\"\"\n",
+ " global one_cols, two_cols\n",
+ " # 使用pandas读取Excel文件\n",
+ " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n",
+ " # 获取第二行数据(索引为1)\n",
+ " one_cols = df.iloc[1].tolist()[1:]\n",
+ " print(f'获取到的数据项ID{one_cols}')\n",
+ "\n",
+ "# def read_xls_data():\n",
+ "# global one_cols, two_cols\n",
+ "# # 打开 XLS 文件\n",
+ "# workbook = xlrd.open_workbook(read_file_path_name)\n",
+ "\n",
+ "# # 获取所有表格名称\n",
+ "# # sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# # 选择第一个表格\n",
+ "# sheet = workbook.sheet_by_index(0)\n",
+ "\n",
+ "# # 获取行数和列数\n",
+ "# num_rows = sheet.nrows\n",
+ "# # num_cols = sheet.ncols\n",
+ "\n",
+ "# # 遍历每一行,获取单元格数据\n",
+ "# # for i in range(num_rows):\n",
+ "# # row_data = sheet.row_values(i)\n",
+ "# # one_cols.append(row_data)\n",
+ "# # two_cols.append(row_data[1])\n",
+ "\n",
+ "# row_data = sheet.row_values(1)\n",
+ "# one_cols = row_data\n",
+ "\n",
+ "# # 关闭 XLS 文件\n",
+ "# # workbook.close()\n",
+ "\n",
+ "def start_3(date=None,token=None,token_push=None):\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " if date==None:\n",
+ " date = datetime.now()\n",
+ " if token==None:\n",
+ " token = get_head_auth()\n",
+ " if token_push==None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:],date)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time(date)[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ "\n",
+ " # 获取当月的数据写入到指定文件\n",
+ " # optimize_Model()\n",
+ " # upload_data_to_system(token_push,date)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "def check_data(dataItemNo):\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, dataItemNo)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ "def save_xls(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xls')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n",
+ "\n",
+ " search_data = {\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\",\n",
+ " \"data\": {\n",
+ " \"dateStart\": dateStart,\n",
+ " \"dateEnd\": dateEnd,\n",
+ " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ " current_year_month = datetime.now().strftime('%Y-%m')\n",
+ " grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ " # 使用openpyxl打开xlsx文件\n",
+ " from openpyxl import load_workbook\n",
+ " workbook = load_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 创建新工作簿\n",
+ " new_workbook = load_workbook('沥青数据项.xlsx')\n",
+ " \n",
+ " for sheetname in workbook.sheetnames:\n",
+ " sheet = workbook[sheetname]\n",
+ " new_sheet = new_workbook[sheetname]\n",
+ " \n",
+ " current_year_month_row = 0\n",
+ " # 查找当前月份数据起始行\n",
+ " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n",
+ " if str(row[0]).startswith(current_year_month):\n",
+ " current_year_month_row += 1\n",
+ "\n",
+ " # 追加新数据\n",
+ " if sheetname == workbook.sheetnames[0]:\n",
+ " start_row = sheet.max_row - current_year_month_row + 1\n",
+ " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n",
+ " new_sheet.cell(row=row_idx, column=1, value=date)\n",
+ " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n",
+ " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ " new_sheet.cell(row=row_idx, column=j, \n",
+ " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ " # 保存修改后的xlsx文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "\n",
+ "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ "# from datetime import datetime, timedelta\n",
+ "# current_year_month = datetime.now().strftime('%Y-%m')\n",
+ "# grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ "# # 打开xls文件\n",
+ "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ "# # 获取所有sheet的个数\n",
+ "# sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ "# # 获取所有sheet的名称\n",
+ "# sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# new_workbook = xlwt.Workbook()\n",
+ "# for i in range(sheet_count):\n",
+ "# # 获取当前sheet\n",
+ "# sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ "# # 获取sheet的行数和列数\n",
+ "# row_count = sheet.nrows\n",
+ "# col_count = sheet.ncols\n",
+ "# # 获取原有数据\n",
+ "# data = []\n",
+ "# for row in range(row_count):\n",
+ "# row_data = []\n",
+ "# for col in range(col_count):\n",
+ "# row_data.append(sheet.cell_value(row, col))\n",
+ "# data.append(row_data)\n",
+ "\n",
+ "# # 创建xlwt的Workbook对象\n",
+ "# # 创建sheet\n",
+ "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ "\n",
+ "# current_year_month_row = 0\n",
+ "# # 将原有的数据写入新的sheet\n",
+ "# for row in range(row_count):\n",
+ "# for col in range(col_count):\n",
+ "# col0 = data[row][0]\n",
+ "# # print(\"col0\",col0[:7])\n",
+ "# if col0[:7] == current_year_month:\n",
+ "# current_year_month_row += 1\n",
+ "# break\n",
+ "# new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ "\n",
+ "# # print(\"current_year_month_row\",current_year_month_row)\n",
+ "# if i == 0:\n",
+ "# rowFlag = 0\n",
+ "# # 查看每组数据\n",
+ "# for date, group in grouped:\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n",
+ "# for j in range(len(dataItemNoList)):\n",
+ "# dataItemNo = dataItemNoList[j]\n",
+ "\n",
+ "# # for dataItemNo in dataItemNoList:\n",
+ "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ "\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ "# rowFlag += 1\n",
+ "\n",
+ "\n",
+ "# # 保存新的xls文件\n",
+ "# new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def queryDataListItemNos(token=None):\n",
+ " from datetime import datetime, timedelta\n",
+ " df = pd.read_excel('沥青数据项.xlsx')\n",
+ " dataItemNoList = df.iloc[0].tolist()[1:]\n",
+ " \n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " if not token:\n",
+ " print('token获取失败')\n",
+ " return\n",
+ "\n",
+ " # 获取当前日期\n",
+ " current_date = datetime.now()\n",
+ "\n",
+ " # 获取当月1日\n",
+ " first_day_of_month = current_date.replace(day=1)\n",
+ "\n",
+ " # 格式化为 YYYYMMDD 格式\n",
+ " dateEnd = current_date.strftime('%Y%m%d')\n",
+ " dateStart = first_day_of_month.strftime('%Y%m%d')\n",
+ "\n",
+ " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n",
+ "\n",
+ "\n",
+ " data_df = pd.DataFrame(search_value)\n",
+ "\n",
+ " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n",
+ " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n",
+ " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n",
+ " print('当月数据更新完成')\n",
+ "\n",
+ "\n",
+ "def save_xls_1(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows - 1\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\") \n",
+ "\n",
+ "\n",
+ "\n",
+ "def start(date=''):\n",
+ " \"\"\"获取当日数据\"\"\"\n",
+ " read_xls_data()\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ " cur_time,cur_time2 = getNow(date)\n",
+ " print(f\"获取{cur_time}数据\")\n",
+ " datas = get_data_value(token, one_cols,date=cur_time)\n",
+ " print(len(datas))\n",
+ " print(datas)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " append_rows = [cur_time2]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " \n",
+ " print('添加的行:',len(append_rows),append_rows)\n",
+ " save_xls_2(append_rows)\n",
+ "\n",
+ "\n",
+ "def save_xls_2(append_rows):\n",
+ " \"\"\"保存或更新数据到Excel文件\n",
+ " 参数:\n",
+ " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n",
+ " \"\"\"\n",
+ " # try:\n",
+ " # 读取现有数据(假设第一行为列名)\n",
+ " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n",
+ " print('文件中的数据列数:',len(df.columns),df.columns)\n",
+ " # 转换append_rows为DataFrame\n",
+ " if len(append_rows) != len(df.columns):\n",
+ " # 去除第二个元素 ,不知道什么原因多一个空数据\n",
+ " append_rows.pop(1)\n",
+ " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n",
+ " # 创建新数据行\n",
+ " new_date = append_rows['日期'].values[0]\n",
+ " \n",
+ " dates = df['日期'].to_list()\n",
+ " # 判断日期是否存在\n",
+ " if new_date in dates:\n",
+ " # 找到日期所在行的索引\n",
+ " date_mask = df['日期'] == new_date\n",
+ " # 存在则更新数据\n",
+ " df.loc[date_mask] = append_rows.values\n",
+ " print(f\"更新 {new_date} 数据\")\n",
+ " else:\n",
+ " # 不存在则追加数据\n",
+ " df = pd.concat([df, append_rows], ignore_index=True)\n",
+ " print(df.head())\n",
+ " print(df.tail())\n",
+ " print(f\"插入 {new_date} 新数据\")\n",
+ " \n",
+ " # 保存更新后的数据\n",
+ " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " \n",
+ " # except FileNotFoundError:\n",
+ " # # 如果文件不存在则创建新文件\n",
+ " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " # except Exception as e:\n",
+ " # print(f\"保存数据时发生错误: {str(e)}\")\n",
+ "\n",
+ "\n",
+ "def main(start_date=None,token=None,token_push=None):\n",
+ " if start_date is None:\n",
+ " start_date = datetime.now()\n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ " if token_push is None:\n",
+ " token_push = get_head_push_auth()\n",
+ " date = start_date.strftime('%Y%m%d')\n",
+ " print(date)\n",
+ "# start(date)\n",
+ " # 更新当月数据\n",
+ " queryDataListItemNos(token)\n",
+ " # 训练模型\n",
+ " optimize_Model()\n",
+ " # # 预测&上传预测结果\n",
+ " upload_data_to_system(token_push,start_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "运行中ing...\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " print(\"运行中ing...\")\n",
+ " # 每天定时12点运行\n",
+ " while True:\n",
+ " # 获取当前时间\n",
+ " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
+ " # print(current_time)\n",
+ "\n",
+ " # 判断当前时间是否为执行任务的时间点\n",
+ " try:\n",
+ " if current_time == \"09:13:00\":\n",
+ " print(\"执行定时任务\")\n",
+ " main()\n",
+ " elif current_time == \"09:18:00\":\n",
+ " print(\"执行定时任务\")\n",
+ " main()\n",
+ " time.sleep(1)\n",
+ " except :\n",
+ " print(f\"{current_time}任务失败\")\n",
+ "# main()\n",
+ " # 检测数据准确性, 需要检测放开\n",
+ " # check_data(\"100028098|LISTING_PRICE\")\n",
+ " # check_data(\"9137070016544622XB|DAY_Yield\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_date = datetime(2025, 5, 16)\n",
+ "# end_date = datetime(2025, 5, 19)\n",
+ "# token = get_head_auth()\n",
+ "\n",
+ "# token_push = get_head_push_auth()\n",
+ "\n",
+ "# while start_date < end_date:\n",
+ "# main(start_date,token,token_push)\n",
+ "# start_date += timedelta(days=1)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### 代码备份:\n",
+ "\n",
+ "\n",
+ "class codeBackup:\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " def write_xls(data,date):\n",
+ " # 创建一个Workbook对象\n",
+ " workbook = xlwt.Workbook()\n",
+ "\n",
+ " # 创建一个Sheet对象,可指定名称\n",
+ " sheet = workbook.load('Sheet1')\n",
+ "\n",
+ " # 写入数据行\n",
+ " for row_index, row_data in enumerate(data):\n",
+ " for col_index, cell_data in enumerate(row_data):\n",
+ " sheet.write(row_index, col_index, cell_data)\n",
+ "\n",
+ " # 保存Workbook到文件\n",
+ " workbook.save(get_cur_time(date)[0] + '.xls')\n",
+ "\n",
+ "\n",
+ " def start():\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " token_push = get_head_push_auth()\n",
+ " if not token_push:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ " \n",
+ " # 获取当月的数据写入到指定文件\n",
+ " queryDataListItemNos(token)\n",
+ " optimize_Model()\n",
+ " upload_data_to_system(token_push)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " def start_1():\n",
+ " '''更新数据'''\n",
+ " print(\"更新当天数据\")\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " print(\"当天数据为:\",append_rows)\n",
+ " save_xls_1(append_rows)\n",
+ "\n",
+ " \n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201
new file mode 100644
index 0000000..44fa65f
--- /dev/null
+++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201
@@ -0,0 +1,1156 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "import json\n",
+ "from datetime import datetime, timedelta\n",
+ "import time\n",
+ "import pandas as pd\n",
+ "\n",
+ "# 变量定义\n",
+ "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
+ "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n",
+ "\n",
+ "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
+ "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
+ "\n",
+ "login_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "login_push_data = {\n",
+ " \"data\": {\n",
+ " \"account\": \"api_dev\",\n",
+ " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
+ " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
+ " \"terminal\": \"API\"\n",
+ " },\n",
+ " \"funcModule\": \"API\",\n",
+ " \"funcOperation\": \"获取token\"\n",
+ "}\n",
+ "\n",
+ "read_file_path_name = \"沥青数据项.xlsx\"\n",
+ "one_cols = []\n",
+ "two_cols = []\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sn\n",
+ "import random\n",
+ "import time\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "from plotly import __version__\n",
+ "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
+ "\n",
+ "from sklearn import preprocessing\n",
+ "\n",
+ "from pandas import Series,DataFrame\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sklearn.datasets as datasets\n",
+ "\n",
+ "#导入机器学习算法模型\n",
+ "from sklearn.linear_model import Lasso\n",
+ "from xgboost import XGBRegressor\n",
+ "\n",
+ "import statsmodels.api as sm\n",
+ "# from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go\n",
+ "\n",
+ "import xgboost as xgb\n",
+ "from xgboost import plot_importance, plot_tree\n",
+ "from sklearn.metrics import mean_absolute_error\n",
+ "from statsmodels.tools.eval_measures import mse,rmse\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from xgboost import XGBRegressor\n",
+ "import warnings\n",
+ "import pickle\n",
+ "\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ "#切割训练数据和样本数据\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "#用于模型评分\n",
+ "from sklearn.metrics import r2_score\n",
+ "\n",
+ "le = preprocessing.LabelEncoder()\n",
+ "\n",
+ "# print(__version__) # requires version >= 1.9.0\n",
+ "\n",
+ "\n",
+ "import cufflinks as cf\n",
+ "cf.go_offline()\n",
+ "\n",
+ "random.seed(100)\n",
+ "\n",
+ "%matplotlib inline\n",
+ "\n",
+ "# 数据获取\n",
+ "\n",
+ "def get_head_auth():\n",
+ " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def get_data_value(token, dataItemNoList,date=''):\n",
+ "\n",
+ " search_data = {\n",
+ " \"data\": {\n",
+ " \"date\": getNow(date)[0],\n",
+ " \"dataItemNoList\": dataItemNoList\n",
+ " },\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\"\n",
+ " }\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " print(\"今天没有新数据\")\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "# xls文件处理\n",
+ "\n",
+ "\n",
+ "\n",
+ "def getNow(date='', offset=0):\n",
+ " \"\"\"生成指定日期的两种格式字符串\n",
+ " Args:\n",
+ " date: 支持多种输入类型:\n",
+ " - datetime对象\n",
+ " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n",
+ " - 空字符串表示当前日期\n",
+ " offset: 日期偏移天数\n",
+ " Returns:\n",
+ " tuple: (紧凑日期字符串, 标准日期字符串)\n",
+ " \"\"\"\n",
+ " # 日期解析逻辑\n",
+ " from datetime import datetime,timedelta\n",
+ " if isinstance(date, datetime):\n",
+ " now = date\n",
+ " else:\n",
+ " now = datetime.now()\n",
+ " if date:\n",
+ " # 尝试多种日期格式解析\n",
+ " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n",
+ " try:\n",
+ " now = datetime.strptime(str(date), fmt)\n",
+ " break\n",
+ " except ValueError:\n",
+ " continue\n",
+ " else:\n",
+ " raise ValueError(f\"无法解析的日期格式: {date}\")\n",
+ "\n",
+ " # 应用日期偏移\n",
+ " now = now - timedelta(days=offset)\n",
+ " \n",
+ " # 统一格式化输出\n",
+ " date_str = now.strftime(\"%Y-%m-%d\")\n",
+ " compact_date = date_str.replace(\"-\", \"\")\n",
+ " return compact_date, date_str\n",
+ "\n",
+ "\n",
+ "\n",
+ "# def get_cur_time(date = ''):\n",
+ "# if date == '':\n",
+ "# now = datetime.now()\n",
+ "# else:\n",
+ "# now = date\n",
+ "# year = now.year\n",
+ "# month = now.month\n",
+ "# day = now.day\n",
+ "\n",
+ "# if month < 10:\n",
+ "# month = \"0\" + str(month)\n",
+ "# if day < 10:\n",
+ "# day = \"0\" + str(day)\n",
+ "# cur_time = str(year) + str(month) + str(day)\n",
+ "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
+ "# # cur_time = '20231007'\n",
+ "# # cur_time2 = '2023-10-07'\n",
+ "# return cur_time, cur_time2\n",
+ "\n",
+ "\n",
+ "def get_head_push_auth():\n",
+ " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
+ " text = json.loads(login_res.text)\n",
+ " if text[\"status\"]:\n",
+ " token = text[\"data\"][\"accessToken\"]\n",
+ " return token\n",
+ " else:\n",
+ " print(\"获取认证失败\")\n",
+ " return None\n",
+ "\n",
+ "def upload_data_to_system(token_push,date):\n",
+ " data = {\n",
+ " \"funcModule\": \"数据表信息列表\",\n",
+ " \"funcOperation\": \"新增\",\n",
+ " \"data\": [\n",
+ " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
+ " \"dataDate\": getNow(date)[0],\n",
+ " \"dataStatus\": \"add\",\n",
+ " \"dataValue\": forecast_price()\n",
+ " }\n",
+ "\n",
+ " ]\n",
+ " }\n",
+ " headers = {\"Authorization\": token_push}\n",
+ " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
+ " print(res.text)\n",
+ "\n",
+ "def forecast_price():\n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n",
+ " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ "\n",
+ " #查看每个特征缺失值数量\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ "\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ "\n",
+ " # 选择用于模型训练的列名称\n",
+ " col_for_training = df_test_1.columns\n",
+ " import joblib\n",
+ " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n",
+ " # 最新的一天为最后一行的数据\n",
+ " df_test_1_Day = df_test_1.tail(1)\n",
+ " # 移除不需要的列\n",
+ " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n",
+ " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n",
+ " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n",
+ " df_test_1_Day=df_test_1_Day.dropna()\n",
+ "\n",
+ " # df_test_1_Day\n",
+ " #预测今日价格,显示至小数点后两位\n",
+ " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
+ "\n",
+ " df_test_1_Day['日度预测价格']=Ypredict_Today\n",
+ " print(df_test_1_Day['日度预测价格'])\n",
+ " a = df_test_1_Day['日度预测价格']\n",
+ " a = a[0]\n",
+ " a = float(a)\n",
+ " a = round(a,2)\n",
+ " return a\n",
+ "def optimize_Model():\n",
+ " from sklearn.model_selection import train_test_split\n",
+ " from sklearn.impute import SimpleImputer\n",
+ " from sklearn.preprocessing import OrdinalEncoder\n",
+ " from sklearn.feature_selection import SelectFromModel\n",
+ " from sklearn.metrics import mean_squared_error, r2_score\n",
+ "\n",
+ " pd.set_option('display.max_rows',40) \n",
+ " pd.set_option('display.max_columns',40) \n",
+ " df_test = pd.read_excel('沥青数据项.xlsx')\n",
+ " df_test.drop([0],inplace=True)\n",
+ " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
+ " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
+ " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
+ " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
+ " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
+ " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n",
+ " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n",
+ " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
+ " # df_test.tail(3)\n",
+ " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
+ " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
+ " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
+ " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
+ " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
+ " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
+ " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
+ " #将缺失值补为前一个或者后一个数值\n",
+ " df_test_1=df_test_1.fillna(df_test.ffill())\n",
+ " df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
+ " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n",
+ " df_test_1.index = df_test_1[\"日期\"]\n",
+ " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n",
+ " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n",
+ "\n",
+ " y=df_test_1['京博指导价']\n",
+ "\n",
+ " x=dataset1 \n",
+ "\n",
+ " train = x\n",
+ " target = y\n",
+ "\n",
+ " #切割数据样本集合测试集\n",
+ " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
+ " \n",
+ " \n",
+ " from sklearn.linear_model import Lasso\n",
+ " from xgboost import XGBRegressor\n",
+ "\n",
+ " import statsmodels.api as sm\n",
+ " # from keras.preprocessing.sequence import TimeseriesGenerator\n",
+ " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
+ "\n",
+ " import plotly.express as px\n",
+ " import plotly.graph_objects as go\n",
+ "\n",
+ " import xgboost as xgb\n",
+ " from xgboost import plot_importance, plot_tree\n",
+ " from sklearn.metrics import mean_absolute_error\n",
+ " from statsmodels.tools.eval_measures import mse,rmse\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ " from xgboost import XGBRegressor\n",
+ " import warnings\n",
+ " import pickle\n",
+ "\n",
+ " from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ " #切割训练数据和样本数据\n",
+ " from sklearn.model_selection import train_test_split\n",
+ "\n",
+ " #用于模型评分\n",
+ " from sklearn.metrics import r2_score\n",
+ "\n",
+ " #模型缩写\n",
+ " Lasso = Lasso(random_state=0)\n",
+ " XGBR = XGBRegressor(random_state=0)\n",
+ " Lasso.fit(X_train,y_train)\n",
+ " XGBR.fit(X_train,y_train)\n",
+ " y_pre_Lasso = Lasso.predict(x_test)\n",
+ " y_pre_XGBR = XGBR.predict(x_test)\n",
+ "\n",
+ " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
+ " Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
+ " XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
+ "\n",
+ " #计算Lasso、XGBR的MSE和RMSE\n",
+ " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
+ " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
+ "\n",
+ " Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
+ " XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
+ " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
+ " ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
+ " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results1=model_results.set_index('模型(Model)')\n",
+ "\n",
+ " def plot_feature_importance(importance,names,model_type):\n",
+ " feature_importance = np.array(importance)\n",
+ " feature_names = np.array(names)\n",
+ "\n",
+ " data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
+ " fi_df = pd.DataFrame(data)\n",
+ "\n",
+ " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
+ "\n",
+ " plt.figure(figsize=(10,8))\n",
+ " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
+ "\n",
+ " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
+ " plt.xlabel('FEATURE IMPORTANCE')\n",
+ " plt.ylabel('FEATURE NAMES')\n",
+ " from pylab import mpl\n",
+ " %pylab\n",
+ " mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
+ " from xgboost import XGBRegressor\n",
+ " from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ " estimator = XGBRegressor(random_state=0,\n",
+ " nthread=4,\n",
+ " seed=0\n",
+ " )\n",
+ " parameters = {\n",
+ " 'max_depth': range (2, 11, 2), # 树的最大深度\n",
+ " 'n_estimators': range (50, 101, 10), # 迭代次数\n",
+ " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
+ " }\n",
+ "\n",
+ " grid_search_XGB = GridSearchCV(\n",
+ " estimator=estimator,\n",
+ " param_grid=parameters,\n",
+ " # n_jobs = 10,\n",
+ " cv = 3,\n",
+ " verbose=True\n",
+ " )\n",
+ "\n",
+ " grid_search_XGB.fit(X_train, y_train)\n",
+ " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n",
+ " print(\"Best parameters set:\")\n",
+ " best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
+ " for param_name in sorted(parameters.keys()):\n",
+ " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n",
+ " y_pred = grid_search_XGB.predict(x_test)\n",
+ "\n",
+ " op_XGBR_score = r2_score(y_true,y_pred)\n",
+ " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
+ " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
+ "\n",
+ " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
+ " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
+ " model_results2=model_results2.set_index('模型(Model)')\n",
+ "\n",
+ " # results = model_results1.append(model_results2, ignore_index = False)\n",
+ " results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
+ " results\n",
+ " import pickle\n",
+ "\n",
+ " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n",
+ "\n",
+ " with open(Pkl_Filename, 'wb') as file: \n",
+ " pickle.dump(grid_search_XGB, file)\n",
+ "\n",
+ "def read_xls_data():\n",
+ " \"\"\"获取特征项ID\"\"\"\n",
+ " global one_cols, two_cols\n",
+ " # 使用pandas读取Excel文件\n",
+ " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n",
+ " # 获取第二行数据(索引为1)\n",
+ " one_cols = df.iloc[1].tolist()[1:]\n",
+ " print(f'获取到的数据项ID{one_cols}')\n",
+ "\n",
+ "# def read_xls_data():\n",
+ "# global one_cols, two_cols\n",
+ "# # 打开 XLS 文件\n",
+ "# workbook = xlrd.open_workbook(read_file_path_name)\n",
+ "\n",
+ "# # 获取所有表格名称\n",
+ "# # sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# # 选择第一个表格\n",
+ "# sheet = workbook.sheet_by_index(0)\n",
+ "\n",
+ "# # 获取行数和列数\n",
+ "# num_rows = sheet.nrows\n",
+ "# # num_cols = sheet.ncols\n",
+ "\n",
+ "# # 遍历每一行,获取单元格数据\n",
+ "# # for i in range(num_rows):\n",
+ "# # row_data = sheet.row_values(i)\n",
+ "# # one_cols.append(row_data)\n",
+ "# # two_cols.append(row_data[1])\n",
+ "\n",
+ "# row_data = sheet.row_values(1)\n",
+ "# one_cols = row_data\n",
+ "\n",
+ "# # 关闭 XLS 文件\n",
+ "# # workbook.close()\n",
+ "\n",
+ "def start_3(date=None,token=None,token_push=None):\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " if date==None:\n",
+ " date = datetime.now()\n",
+ " if token==None:\n",
+ " token = get_head_auth()\n",
+ " if token_push==None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:],date)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time(date)[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ "\n",
+ " # 获取当月的数据写入到指定文件\n",
+ " # optimize_Model()\n",
+ " # upload_data_to_system(token_push,date)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "def check_data(dataItemNo):\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, dataItemNo)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ "def save_xls(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xls')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n",
+ "\n",
+ " search_data = {\n",
+ " \"funcModule\": \"数据项\",\n",
+ " \"funcOperation\": \"查询\",\n",
+ " \"data\": {\n",
+ " \"dateStart\": dateStart,\n",
+ " \"dateEnd\": dateEnd,\n",
+ " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " headers = {\"Authorization\": token}\n",
+ " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n",
+ " search_value = json.loads(search_res.text)[\"data\"]\n",
+ " if search_value:\n",
+ " return search_value\n",
+ " else:\n",
+ " return None\n",
+ "\n",
+ "\n",
+ "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ " current_year_month = datetime.now().strftime('%Y-%m')\n",
+ " grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ " # 使用openpyxl打开xlsx文件\n",
+ " from openpyxl import load_workbook\n",
+ " workbook = load_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 创建新工作簿\n",
+ " new_workbook = load_workbook('沥青数据项.xlsx')\n",
+ " \n",
+ " for sheetname in workbook.sheetnames:\n",
+ " sheet = workbook[sheetname]\n",
+ " new_sheet = new_workbook[sheetname]\n",
+ " \n",
+ " current_year_month_row = 0\n",
+ " # 查找当前月份数据起始行\n",
+ " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n",
+ " if str(row[0]).startswith(current_year_month):\n",
+ " current_year_month_row += 1\n",
+ "\n",
+ " # 追加新数据\n",
+ " if sheetname == workbook.sheetnames[0]:\n",
+ " start_row = sheet.max_row - current_year_month_row + 1\n",
+ " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n",
+ " new_sheet.cell(row=row_idx, column=1, value=date)\n",
+ " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n",
+ " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ " new_sheet.cell(row=row_idx, column=j, \n",
+ " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ " # 保存修改后的xlsx文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "\n",
+ "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n",
+ "# from datetime import datetime, timedelta\n",
+ "# current_year_month = datetime.now().strftime('%Y-%m')\n",
+ "# grouped = data_df.groupby(\"dataDate\")\n",
+ "\n",
+ "# # 打开xls文件\n",
+ "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ "# # 获取所有sheet的个数\n",
+ "# sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ "# # 获取所有sheet的名称\n",
+ "# sheet_names = workbook.sheet_names()\n",
+ "\n",
+ "# new_workbook = xlwt.Workbook()\n",
+ "# for i in range(sheet_count):\n",
+ "# # 获取当前sheet\n",
+ "# sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ "# # 获取sheet的行数和列数\n",
+ "# row_count = sheet.nrows\n",
+ "# col_count = sheet.ncols\n",
+ "# # 获取原有数据\n",
+ "# data = []\n",
+ "# for row in range(row_count):\n",
+ "# row_data = []\n",
+ "# for col in range(col_count):\n",
+ "# row_data.append(sheet.cell_value(row, col))\n",
+ "# data.append(row_data)\n",
+ "\n",
+ "# # 创建xlwt的Workbook对象\n",
+ "# # 创建sheet\n",
+ "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ "\n",
+ "# current_year_month_row = 0\n",
+ "# # 将原有的数据写入新的sheet\n",
+ "# for row in range(row_count):\n",
+ "# for col in range(col_count):\n",
+ "# col0 = data[row][0]\n",
+ "# # print(\"col0\",col0[:7])\n",
+ "# if col0[:7] == current_year_month:\n",
+ "# current_year_month_row += 1\n",
+ "# break\n",
+ "# new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ "\n",
+ "# # print(\"current_year_month_row\",current_year_month_row)\n",
+ "# if i == 0:\n",
+ "# rowFlag = 0\n",
+ "# # 查看每组数据\n",
+ "# for date, group in grouped:\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n",
+ "# for j in range(len(dataItemNoList)):\n",
+ "# dataItemNo = dataItemNoList[j]\n",
+ "\n",
+ "# # for dataItemNo in dataItemNoList:\n",
+ "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n",
+ "\n",
+ "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n",
+ "\n",
+ "# rowFlag += 1\n",
+ "\n",
+ "\n",
+ "# # 保存新的xls文件\n",
+ "# new_workbook.save(\"沥青数据项.xlsx\")\n",
+ "\n",
+ "def queryDataListItemNos(token=None):\n",
+ " from datetime import datetime, timedelta\n",
+ " df = pd.read_excel('沥青数据项.xlsx')\n",
+ " dataItemNoList = df.iloc[0].tolist()[1:]\n",
+ " \n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ "\n",
+ " if not token:\n",
+ " print('token获取失败')\n",
+ " return\n",
+ "\n",
+ " # 获取当前日期\n",
+ " current_date = datetime.now()\n",
+ "\n",
+ " # 获取当月1日\n",
+ " first_day_of_month = current_date.replace(day=1)\n",
+ "\n",
+ " # 格式化为 YYYYMMDD 格式\n",
+ " dateEnd = current_date.strftime('%Y%m%d')\n",
+ " dateStart = first_day_of_month.strftime('%Y%m%d')\n",
+ "\n",
+ " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n",
+ "\n",
+ "\n",
+ " data_df = pd.DataFrame(search_value)\n",
+ "\n",
+ " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n",
+ " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n",
+ " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n",
+ " print('当月数据更新完成')\n",
+ "\n",
+ "\n",
+ "def save_xls_1(append_rows):\n",
+ "\n",
+ " # 打开xls文件\n",
+ " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n",
+ "\n",
+ " # 获取所有sheet的个数\n",
+ " sheet_count = len(workbook.sheet_names())\n",
+ "\n",
+ " # 获取所有sheet的名称\n",
+ " sheet_names = workbook.sheet_names()\n",
+ "\n",
+ " new_workbook = xlwt.Workbook()\n",
+ " for i in range(sheet_count):\n",
+ " # 获取当前sheet\n",
+ " sheet = workbook.sheet_by_index(i)\n",
+ "\n",
+ " # 获取sheet的行数和列数\n",
+ " row_count = sheet.nrows - 1\n",
+ " col_count = sheet.ncols\n",
+ " # 获取原有数据\n",
+ " data = []\n",
+ " for row in range(row_count):\n",
+ " row_data = []\n",
+ " for col in range(col_count):\n",
+ " row_data.append(sheet.cell_value(row, col))\n",
+ " data.append(row_data)\n",
+ " # 创建xlwt的Workbook对象\n",
+ " # 创建sheet\n",
+ " new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
+ "\n",
+ " # 将原有的数据写入新的sheet\n",
+ " for row in range(row_count):\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row, col, data[row][col])\n",
+ "\n",
+ " if i == 0:\n",
+ " # 在新的sheet中添加数据\n",
+ " for col in range(col_count):\n",
+ " new_sheet.write(row_count, col, append_rows[col])\n",
+ "\n",
+ " # 保存新的xls文件\n",
+ " new_workbook.save(\"沥青数据项.xlsx\") \n",
+ "\n",
+ "\n",
+ "\n",
+ "def start(date=''):\n",
+ " \"\"\"获取当日数据\"\"\"\n",
+ " read_xls_data()\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ " cur_time,cur_time2 = getNow(date)\n",
+ " print(f\"获取{cur_time}数据\")\n",
+ " datas = get_data_value(token, one_cols,date=cur_time)\n",
+ " print(len(datas))\n",
+ " print(datas)\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " append_rows = [cur_time2]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " \n",
+ " print('添加的行:',len(append_rows),append_rows)\n",
+ " save_xls_2(append_rows)\n",
+ "\n",
+ "\n",
+ "def save_xls_2(append_rows):\n",
+ " \"\"\"保存或更新数据到Excel文件\n",
+ " 参数:\n",
+ " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n",
+ " \"\"\"\n",
+ " # try:\n",
+ " # 读取现有数据(假设第一行为列名)\n",
+ " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n",
+ " print('文件中的数据列数:',len(df.columns),df.columns)\n",
+ " # 转换append_rows为DataFrame\n",
+ " if len(append_rows) != len(df.columns):\n",
+ " # 去除第二个元素 ,不知道什么原因多一个空数据\n",
+ " append_rows.pop(1)\n",
+ " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n",
+ " # 创建新数据行\n",
+ " new_date = append_rows['日期'].values[0]\n",
+ " \n",
+ " dates = df['日期'].to_list()\n",
+ " # 判断日期是否存在\n",
+ " if new_date in dates:\n",
+ " # 找到日期所在行的索引\n",
+ " date_mask = df['日期'] == new_date\n",
+ " # 存在则更新数据\n",
+ " df.loc[date_mask] = append_rows.values\n",
+ " print(f\"更新 {new_date} 数据\")\n",
+ " else:\n",
+ " # 不存在则追加数据\n",
+ " df = pd.concat([df, append_rows], ignore_index=True)\n",
+ " print(df.head())\n",
+ " print(df.tail())\n",
+ " print(f\"插入 {new_date} 新数据\")\n",
+ " \n",
+ " # 保存更新后的数据\n",
+ " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " \n",
+ " # except FileNotFoundError:\n",
+ " # # 如果文件不存在则创建新文件\n",
+ " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n",
+ " # except Exception as e:\n",
+ " # print(f\"保存数据时发生错误: {str(e)}\")\n",
+ "\n",
+ "\n",
+ "def main(start_date=None,token=None,token_push=None):\n",
+ " if start_date is None:\n",
+ " start_date = datetime.now()\n",
+ " if token is None:\n",
+ " token = get_head_auth()\n",
+ " if token_push is None:\n",
+ " token_push = get_head_push_auth()\n",
+ " date = start_date.strftime('%Y%m%d')\n",
+ " print(date)\n",
+ "# start(date)\n",
+ " # 更新当月数据\n",
+ " queryDataListItemNos(token)\n",
+ " # 训练模型\n",
+ " optimize_Model()\n",
+ " # # 预测&上传预测结果\n",
+ " upload_data_to_system(token_push,start_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "20250522\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n",
+ "\n",
+ "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "当月数据更新完成\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n",
+ "\n",
+ "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Using matplotlib backend: inline\n",
+ "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
+ "Populating the interactive namespace from numpy and matplotlib\n",
+ "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n",
+ "\n",
+ "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n",
+ "`%matplotlib` prevents importing * from pylab and numpy\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Best score: 0.997\n",
+ "Best parameters set:\n",
+ "\tlearning_rate: 0.1\n",
+ "\tmax_depth: 8\n",
+ "\tn_estimators: 90\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n",
+ "\n",
+ "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+ "\n",
+ "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n",
+ "\n",
+ "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "日期\n",
+ "2025-05-22 3600.097412\n",
+ "Name: 日度预测价格, dtype: float32\n",
+ "{\"confirmFlg\":false,\"status\":true}\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "# if __name__ == \"__main__\":\n",
+ "# print(\"运行中ing...\")\n",
+ "# # 每天定时12点运行\n",
+ "# while True:\n",
+ "# # 获取当前时间\n",
+ "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
+ "# # print(current_time)\n",
+ "\n",
+ "# # 判断当前时间是否为执行任务的时间点\n",
+ "# try:\n",
+ "# if current_time == \"09:13:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# elif current_time == \"09:18:00\":\n",
+ "# print(\"执行定时任务\")\n",
+ "# main()\n",
+ "# time.sleep(1)\n",
+ "# except :\n",
+ "# print(f\"{current_time}任务失败\")\n",
+ "main()\n",
+ " # 检测数据准确性, 需要检测放开\n",
+ " # check_data(\"100028098|LISTING_PRICE\")\n",
+ " # check_data(\"9137070016544622XB|DAY_Yield\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_date = datetime(2025, 5, 16)\n",
+ "# end_date = datetime(2025, 5, 19)\n",
+ "# token = get_head_auth()\n",
+ "\n",
+ "# token_push = get_head_push_auth()\n",
+ "\n",
+ "# while start_date < end_date:\n",
+ "# main(start_date,token,token_push)\n",
+ "# start_date += timedelta(days=1)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### 代码备份:\n",
+ "\n",
+ "\n",
+ "class codeBackup:\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " def write_xls(data,date):\n",
+ " # 创建一个Workbook对象\n",
+ " workbook = xlwt.Workbook()\n",
+ "\n",
+ " # 创建一个Sheet对象,可指定名称\n",
+ " sheet = workbook.load('Sheet1')\n",
+ "\n",
+ " # 写入数据行\n",
+ " for row_index, row_data in enumerate(data):\n",
+ " for col_index, cell_data in enumerate(row_data):\n",
+ " sheet.write(row_index, col_index, cell_data)\n",
+ "\n",
+ " # 保存Workbook到文件\n",
+ " workbook.save(get_cur_time(date)[0] + '.xls')\n",
+ "\n",
+ "\n",
+ " def start():\n",
+ " '''预测上传数据'''\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " token_push = get_head_push_auth()\n",
+ " if not token_push:\n",
+ " return\n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " save_xls(append_rows)\n",
+ " \n",
+ " # 获取当月的数据写入到指定文件\n",
+ " queryDataListItemNos(token)\n",
+ " optimize_Model()\n",
+ " upload_data_to_system(token_push)\n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " def start_1():\n",
+ " '''更新数据'''\n",
+ " print(\"更新当天数据\")\n",
+ " read_xls_data()\n",
+ "\n",
+ " token = get_head_auth()\n",
+ " if not token:\n",
+ " return\n",
+ " \n",
+ "\n",
+ " datas = get_data_value(token, one_cols[1:])\n",
+ " if not datas:\n",
+ " return\n",
+ "\n",
+ " # data_list = [two_cols, one_cols]\n",
+ " append_rows = [get_cur_time()[1]]\n",
+ " dataItemNo_dataValue = {}\n",
+ " for data_value in datas:\n",
+ " if \"dataValue\" not in data_value:\n",
+ " print(data_value)\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
+ " else:\n",
+ " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
+ " \n",
+ " for value in one_cols[1:]:\n",
+ " if value in dataItemNo_dataValue:\n",
+ " append_rows.append(dataItemNo_dataValue[value])\n",
+ " else:\n",
+ " append_rows.append(\"\")\n",
+ " print(\"当天数据为:\",append_rows)\n",
+ " save_xls_1(append_rows)\n",
+ "\n",
+ " \n",
+ " # data_list.append(three_cols)\n",
+ " # write_xls(data_list)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/aisenzhecode/沥青/沥青数据项.xlsx b/aisenzhecode/沥青/沥青数据项.xlsx
index 60c9a12..e5d9da4 100644
Binary files a/aisenzhecode/沥青/沥青数据项.xlsx and b/aisenzhecode/沥青/沥青数据项.xlsx differ
diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87044 b/aisenzhecode/沥青/沥青数据项.xlsx.r87044
new file mode 100644
index 0000000..bd31931
Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项.xlsx.r87044 differ
diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87201 b/aisenzhecode/沥青/沥青数据项.xlsx.r87201
new file mode 100644
index 0000000..cea746a
Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项.xlsx.r87201 differ
diff --git a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl
index 47f7dce..50400c1 100644
Binary files a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl and b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl differ
diff --git a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
index 28e7bd2..a36ad46 100644
--- a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
+++ b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
@@ -2,28 +2,20 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
- "\n"
- ]
- },
{
"data": {
"text/html": [
"