diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/PriceForecast-svn.iml b/.idea/PriceForecast-svn.iml new file mode 100644 index 0000000..281df3b --- /dev/null +++ b/.idea/PriceForecast-svn.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..9b8af4c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..13dea3c --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/aisenzhecode/沥青/定性模型数据项12-11.xlsx b/aisenzhecode/沥青/定性模型数据项12-11.xlsx index f9dcf11..5fb9d60 100644 Binary files a/aisenzhecode/沥青/定性模型数据项12-11.xlsx and b/aisenzhecode/沥青/定性模型数据项12-11.xlsx differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl index 4158966..3835903 100644 Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 new file mode 100644 index 0000000..47e6ce9 Binary files /dev/null and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 new file mode 100644 index 0000000..cd43d5c Binary files /dev/null and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 differ diff --git a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb index 15a3486..7e61e16 100644 --- a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb +++ b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb @@ -1169,7 +1169,474 @@ "name": "stdout", "output_type": "stream", "text": [ - "运行中...\n" + "运行中...\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1428 2025-05-06 3650 29.7819 1 -798.61 6000 90799.9 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1428 2934.18 1 4876.81 7364.99 \n", + "日期存在,即将更新\n", + "新数据 [3650.0, '', '', -798.61, '', 90799.9179153, '', 2752.8654, '', '', 7364.987]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1428 2025-05-06 3650 29.7819 1 -798.61 6000 90799.9 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1428 2934.18 1 4876.81 7364.99 \n", + "20250507\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1427 2025-05-05 3650 29.3624 1 -798.61 6000 90799.9 3630 \n", + "1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1427 2752.87 1 4876.81 7364.99 \n", + "1428 2934.18 1 4876.81 6793.88 \n", + "前一天的 2752.8654 \n", + "现在的 2934.179 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1427 2025-05-05 3650 29.3624 1 -798.61 6000 90799.9 3630 \n", + "1 1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 2752.87 1 4876.81 7364.99 \n", + "1 2934.18 1 4876.81 6793.88 \n", + "昨日计划提货偏差改之前 -454.19\n", + "昨日计划提货偏差改之后 1917.0684\n", + "**************************************************预测结果: 3689.0\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1429 2025-05-07 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1429 3036.84 1 4876.81 6793.88 \n", + "日期存在,即将更新\n", + "新数据 [3650.0, 32.2987, '', -454.19, '', 94300.352488, '', 2934.179, '', '', 6793.8778]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1429 2025-05-07 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1429 3036.84 1 4876.81 6793.88 \n", + "20250508\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1428 2934.18 1 4876.81 6793.88 \n", + "1429 3036.84 1 4876.81 7237.56 \n", + "前一天的 2934.179 \n", + "现在的 3036.8392 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1428 2025-05-06 3650 32.2987 1 -454.19 6000 94300.4 3630 \n", + "1 1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 2934.18 1 4876.81 6793.88 \n", + "1 3036.84 1 4876.81 7237.56 \n", + "昨日计划提货偏差改之前 693.59\n", + "昨日计划提货偏差改之后 2360.7506000000003\n", + "**************************************************预测结果: 3673.89\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1430 2025-05-08 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1430 2940.67 1 4876.81 7237.56 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 33.9765, '', 693.59, '', 97550.7881218, '', 3036.8392, '', '', 7237.56]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1430 2025-05-08 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1430 2940.67 1 4876.81 7237.56 \n", + "20250509\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "1429 3036.84 1 4876.81 7237.56 \n", + "1430 2940.67 1 4876.81 7711.98 \n", + "前一天的 3036.8392 \n", + "现在的 2940.6669 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1429 2025-05-07 3600 33.9765 1 693.59 6000 97550.8 3630 \n", + "1 1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3036.84 1 4876.81 7237.56 \n", + "1 2940.67 1 4876.81 7711.98 \n", + "昨日计划提货偏差改之前 329.51\n", + "昨日计划提货偏差改之后 2835.1686\n", + "**************************************************预测结果: 3604.08\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1431 2025-05-09 3600 34.396 1 329.51 6000 100086 3630 3077.15 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1431 1 4876.81 7711.98 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 34.396, '', 329.51, '', 100085.7741619, '', 2940.6669, '', '', 7711.978]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1431 2025-05-09 3600 34.396 1 329.51 6000 100086 3630 3077.15 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1431 1 4876.81 7711.98 \n", + "20250510\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 2940.67 \n", + "1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 3077.15 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1430 1 4876.81 7711.98 \n", + "1431 1 4876.81 7246.98 \n", + "前一天的 2940.6669 \n", + "现在的 3077.1512 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1430 2025-05-08 3600 34.396 1 329.51 6000 100086 3630 \n", + "1 1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 2940.67 1 4876.81 7711.98 \n", + "1 3077.15 1 4876.81 7246.98 \n", + "昨日计划提货偏差改之前 -1636.9\n", + "昨日计划提货偏差改之后 2370.1705999999995\n", + "**************************************************预测结果: 3630.55\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1432 2025-05-10 3620 36.9128 1 -1636.9 6000 101412 3630 3152.87 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1432 1 4876.81 7246.98 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, 36.9128, '', -1636.9, '', 101412.2754832, '', 3077.1512, '', '', 7246.98]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1432 2025-05-10 3620 36.9128 1 -1636.9 6000 101412 3630 3152.87 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1432 1 4876.81 7246.98 \n", + "20250511\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 3077.15 \n", + "1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 3152.87 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1431 1 4876.81 7246.98 \n", + "1432 1 4876.81 7347.37 \n", + "前一天的 3077.1512 \n", + "现在的 3152.8717 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1431 2025-05-09 3620 36.9128 1 -1636.9 6000 101412 3630 \n", + "1 1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3077.15 1 4876.81 7246.98 \n", + "1 3152.87 1 4876.81 7347.37 \n", + "昨日计划提货偏差改之前 3087.72\n", + "昨日计划提货偏差改之后 2470.5621\n", + "**************************************************预测结果: 3623.41\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1433 2025-05-11 3620 36.9128 1 3087.72 6000 104559 3630 3151.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1433 1 4876.81 7347.37 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, '', '', 3087.72, '', 104559.3953618, '', 3152.8717, '', '', 7347.3715]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1433 2025-05-11 3620 36.9128 1 3087.72 6000 104559 3630 3151.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1433 1 4876.81 7347.37 \n", + "20250512\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 3152.87 \n", + "1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 3151.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1432 1 4876.81 7347.37 \n", + "1433 1 4876.81 7365.94 \n", + "前一天的 3152.8717 \n", + "现在的 3151.3966 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1432 2025-05-10 3620 36.9128 1 3087.72 6000 104559 3630 \n", + "1 1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3152.87 1 4876.81 7347.37 \n", + "1 3151.4 1 4876.81 7365.94 \n", + "昨日计划提货偏差改之前 2942.36\n", + "昨日计划提货偏差改之后 2489.1256000000003\n", + "**************************************************预测结果: 3623.44\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1434 2025-05-12 3620 36.9128 1 2942.36 6000 106829 3630 3171.09 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1434 1 4876.81 7365.94 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, '', '', 2942.36, '', 106829.1756043, '', 3151.3966, '', '', 7365.935]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1434 2025-05-12 3620 36.9128 1 2942.36 6000 106829 3630 3171.09 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1434 1 4876.81 7365.94 \n", + "20250513\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 3151.4 \n", + "1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 3171.09 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1433 1 4876.81 7365.94 \n", + "1434 1 4876.81 7267.18 \n", + "前一天的 3151.3966 \n", + "现在的 3171.0934 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1433 2025-05-11 3620 36.9128 1 2942.36 6000 106829 3630 \n", + "1 1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3151.4 1 4876.81 7365.94 \n", + "1 3171.09 1 4876.81 7267.18 \n", + "昨日计划提货偏差改之前 251.7\n", + "昨日计划提货偏差改之后 2390.3720999999996\n", + "**************************************************预测结果: 3623.39\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1435 2025-05-13 3620 34.396 1 251.7 6000 106775 3630 3191.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1435 1 4876.81 7267.18 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, 34.396, '', 251.7, '', 106775.4686698, '', 3171.0934, '', '', 7267.1815]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1435 2025-05-13 3620 34.396 1 251.7 6000 106775 3630 3191.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1435 1 4876.81 7267.18 \n", + "20250514\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 3171.09 \n", + "1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 3191.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1434 1 4876.81 7267.18 \n", + "1435 1 4876.81 7248.69 \n", + "前一天的 3171.0934 \n", + "现在的 3191.6925 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1434 2025-05-12 3620 34.396 1 251.7 6000 106775 3630 \n", + "1 1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3171.09 1 4876.81 7267.18 \n", + "1 3191.69 1 4876.81 7248.69 \n", + "昨日计划提货偏差改之前 -256.33\n", + "昨日计划提货偏差改之后 2371.8786\n", + "**************************************************预测结果: 3623.36\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1436 2025-05-14 3600 34.396 1 -256.33 6000 106220 3630 3318.29 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1436 1 4876.81 7248.69 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 34.396, '', -256.33, '', 106220.4490483, '', 3191.6925, '', '', 7248.688]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1436 2025-05-14 3600 34.396 1 -256.33 6000 106220 3630 3318.29 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1436 1 4876.81 7248.69 \n", + "20250515\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 3191.69 \n", + "1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 3318.29 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1435 1 4876.81 7248.69 \n", + "1436 1 4876.81 7200.57 \n", + "前一天的 3191.6925 \n", + "现在的 3318.2936 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1435 2025-05-13 3600 34.396 1 -256.33 6000 106220 3630 \n", + "1 1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3191.69 1 4876.81 7248.69 \n", + "1 3318.29 1 4876.81 7200.57 \n", + "昨日计划提货偏差改之前 -1015.89\n", + "昨日计划提货偏差改之后 2323.7650999999996\n", + "**************************************************预测结果: 3628.64\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1437 2025-05-15 3600 33.557 1 -1015.89 6000 106321 3630 3170.14 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1437 1 4876.81 7200.57 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 33.557, '', -1015.89, '', 106321.4042938, '', 3318.2936, '', '', 7200.5745]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1437 2025-05-15 3600 33.557 1 -1015.89 6000 106321 3630 3170.14 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1437 1 4876.81 7200.57 \n", + "20250516\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 3318.29 \n", + "1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 3170.14 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1436 1 4876.81 7200.57 \n", + "1437 1 4876.81 7298.53 \n", + "前一天的 3318.2936 \n", + "现在的 3170.1365 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1436 2025-05-14 3600 33.557 1 -1015.89 6000 106321 3630 \n", + "1 1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3318.29 1 4876.81 7200.57 \n", + "1 3170.14 1 4876.81 7298.53 \n", + "昨日计划提货偏差改之前 1408.4\n", + "昨日计划提货偏差改之后 2421.7255999999998\n", + "**************************************************预测结果: 3573.82\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1438 2025-05-16 3620 33.9765 1 1408.4 6000 105595 3630 3164.18 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1438 1 4876.81 7298.53 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 33.9765, '', 1408.4, '', 105595.49, '', 3170.1365, '', '', 7298.535]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1438 2025-05-16 3620 33.9765 1 1408.4 6000 105595 3630 3164.18 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1438 1 4876.81 7298.53 \n", + "20250517\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 3170.14 \n", + "1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 3164.18 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1437 1 4876.81 7298.53 \n", + "1438 1 4876.81 7407.17 \n", + "前一天的 3170.1365 \n", + "现在的 3164.177 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1437 2025-05-15 3600 33.9765 1 1408.4 6000 105595 3630 \n", + "1 1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3170.14 1 4876.81 7298.53 \n", + "1 3164.18 1 4876.81 7407.17 \n", + "昨日计划提货偏差改之前 -1308.63\n", + "昨日计划提货偏差改之后 2530.3635999999997\n", + "**************************************************预测结果: 3603.59\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1439 2025-05-17 3620 34.8154 1 -1308.63 6000 107985 3630 3246.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1439 1 4876.81 7407.17 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, 34.8154, '', -1308.63, '', 107984.71, '', 3164.177, '', '', 7407.173]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1439 2025-05-17 3620 34.8154 1 -1308.63 6000 107985 3630 3246.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1439 1 4876.81 7407.17 \n", + "20250518\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 3164.18 \n", + "1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 3246.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1438 1 4876.81 7407.17 \n", + "1439 1 4876.81 7029.96 \n", + "前一天的 3164.177 \n", + "现在的 3246.6882 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1438 2025-05-16 3620 34.8154 1 -1308.63 6000 107985 3630 \n", + "1 1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3164.18 1 4876.81 7407.17 \n", + "1 3246.69 1 4876.81 7029.96 \n", + "昨日计划提货偏差改之前 3334.9\n", + "昨日计划提货偏差改之后 2153.1485999999995\n", + "**************************************************预测结果: 3623.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1440 2025-05-18 3620 34.8154 1 3334.9 6000 109984 3630 3249.93 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1440 1 4876.81 7029.96 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, '', '', 3334.9, '', 109984.19, '', 3246.6882, '', '', 7029.958]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1440 2025-05-18 3620 34.8154 1 3334.9 6000 109984 3630 3249.93 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1440 1 4876.81 7029.96 \n", + "20250519\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 3246.69 \n", + "1440 2025-05-18 3620 34.8154 1 793.84 6000 110418 3630 3249.93 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1439 1 4876.81 7029.96 \n", + "1440 1 4876.81 6533.09 \n", + "前一天的 3246.6882 \n", + "现在的 3249.9268 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1439 2025-05-17 3620 34.8154 1 3334.9 6000 109984 3630 \n", + "1 1440 2025-05-18 3620 34.8154 1 793.84 6000 110418 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3246.69 1 4876.81 7029.96 \n", + "1 3249.93 1 4876.81 6533.09 \n", + "昨日计划提货偏差改之前 793.84\n", + "昨日计划提货偏差改之后 1656.2806\n", + "**************************************************预测结果: 3622.23\n" ] } ], diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb index eb32307..bcc2226 100644 --- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb @@ -2,14 +2,56 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, +<<<<<<< .mine + "execution_count": null, +||||||| .r87044 + "execution_count": 1, +======= + "execution_count": 5, +>>>>>>> .r87201 "metadata": {}, "outputs": [ { +<<<<<<< .mine + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", + "\n" + ] + }, + { +||||||| .r87044 + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n", + "\n", + "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", + "\n" + ] + }, + { +======= +>>>>>>> .r87201 "data": { "text/html": [ " \n", + " \n", " " ] }, @@ -33,8 +79,6 @@ "source": [ "import requests\n", "import json\n", - "import xlrd\n", - "import xlwt\n", "from datetime import datetime, timedelta\n", "import time\n", "import pandas as pd\n", @@ -151,7 +195,7 @@ "\n", " search_data = {\n", " \"data\": {\n", - " \"date\": get_cur_time(date)[0],\n", + " \"date\": getNow(date)[0],\n", " \"dataItemNoList\": dataItemNoList\n", " },\n", " \"funcModule\": \"数据项\",\n", @@ -169,24 +213,64 @@ "\n", "# xls文件处理\n", "\n", - "def get_cur_time(date = ''):\n", - " if date == '':\n", - " now = datetime.now()\n", - " else:\n", - " now = date\n", - " year = now.year\n", - " month = now.month\n", - " day = now.day\n", "\n", - " if month < 10:\n", - " month = \"0\" + str(month)\n", - " if day < 10:\n", - " day = \"0\" + str(day)\n", - " cur_time = str(year) + str(month) + str(day)\n", - " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", - "# cur_time = '20231007'\n", - "# cur_time2 = '2023-10-07'\n", - " return cur_time, cur_time2\n", + "\n", + "def getNow(date='', offset=0):\n", + " \"\"\"生成指定日期的两种格式字符串\n", + " Args:\n", + " date: 支持多种输入类型:\n", + " - datetime对象\n", + " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", + " - 空字符串表示当前日期\n", + " offset: 日期偏移天数\n", + " Returns:\n", + " tuple: (紧凑日期字符串, 标准日期字符串)\n", + " \"\"\"\n", + " # 日期解析逻辑\n", + " from datetime import datetime,timedelta\n", + " if isinstance(date, datetime):\n", + " now = date\n", + " else:\n", + " now = datetime.now()\n", + " if date:\n", + " # 尝试多种日期格式解析\n", + " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", + " try:\n", + " now = datetime.strptime(str(date), fmt)\n", + " break\n", + " except ValueError:\n", + " continue\n", + " else:\n", + " raise ValueError(f\"无法解析的日期格式: {date}\")\n", + "\n", + " # 应用日期偏移\n", + " now = now - timedelta(days=offset)\n", + " \n", + " # 统一格式化输出\n", + " date_str = now.strftime(\"%Y-%m-%d\")\n", + " compact_date = date_str.replace(\"-\", \"\")\n", + " return compact_date, date_str\n", + "\n", + "\n", + "\n", + "# def get_cur_time(date = ''):\n", + "# if date == '':\n", + "# now = datetime.now()\n", + "# else:\n", + "# now = date\n", + "# year = now.year\n", + "# month = now.month\n", + "# day = now.day\n", + "\n", + "# if month < 10:\n", + "# month = \"0\" + str(month)\n", + "# if day < 10:\n", + "# day = \"0\" + str(day)\n", + "# cur_time = str(year) + str(month) + str(day)\n", + "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# # cur_time = '20231007'\n", + "# # cur_time2 = '2023-10-07'\n", + "# return cur_time, cur_time2\n", "\n", "\n", "def get_head_push_auth():\n", @@ -205,7 +289,7 @@ " \"funcOperation\": \"新增\",\n", " \"data\": [\n", " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", - " \"dataDate\": get_cur_time(date)[0],\n", + " \"dataDate\": getNow(date)[0],\n", " \"dataStatus\": \"add\",\n", " \"dataValue\": forecast_price()\n", " }\n", @@ -217,7 +301,7 @@ " print(res.text)\n", "\n", "def forecast_price():\n", - " df_test = pd.read_excel('沥青数据项.xlsx',sheet_name='数据项历史数据')\n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", " df_test.drop([0],inplace=True)\n", " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", @@ -277,7 +361,7 @@ "\n", " pd.set_option('display.max_rows',40) \n", " pd.set_option('display.max_columns',40) \n", - " df_test = pd.read_excel('沥青数据项.xlsx',sheet_name='数据项历史数据')\n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", " df_test.drop([0],inplace=True)\n", " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", @@ -434,31 +518,40 @@ " pickle.dump(grid_search_XGB, file)\n", "\n", "def read_xls_data():\n", + " \"\"\"获取特征项ID\"\"\"\n", " global one_cols, two_cols\n", - " # 打开 XLS 文件\n", - " workbook = xlrd.open_workbook(read_file_path_name)\n", + " # 使用pandas读取Excel文件\n", + " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", + " # 获取第二行数据(索引为1)\n", + " one_cols = df.iloc[1].tolist()[1:]\n", + " print(f'获取到的数据项ID{one_cols}')\n", "\n", - " # 获取所有表格名称\n", - " # sheet_names = workbook.sheet_names()\n", + "# def read_xls_data():\n", + "# global one_cols, two_cols\n", + "# # 打开 XLS 文件\n", + "# workbook = xlrd.open_workbook(read_file_path_name)\n", "\n", - " # 选择第一个表格\n", - " sheet = workbook.sheet_by_index(0)\n", + "# # 获取所有表格名称\n", + "# # sheet_names = workbook.sheet_names()\n", "\n", - " # 获取行数和列数\n", - " num_rows = sheet.nrows\n", - " # num_cols = sheet.ncols\n", + "# # 选择第一个表格\n", + "# sheet = workbook.sheet_by_index(0)\n", "\n", - " # 遍历每一行,获取单元格数据\n", - " # for i in range(num_rows):\n", - " # row_data = sheet.row_values(i)\n", - " # one_cols.append(row_data)\n", - " # two_cols.append(row_data[1])\n", + "# # 获取行数和列数\n", + "# num_rows = sheet.nrows\n", + "# # num_cols = sheet.ncols\n", "\n", - " row_data = sheet.row_values(1)\n", - " one_cols = row_data\n", + "# # 遍历每一行,获取单元格数据\n", + "# # for i in range(num_rows):\n", + "# # row_data = sheet.row_values(i)\n", + "# # one_cols.append(row_data)\n", + "# # two_cols.append(row_data[1])\n", "\n", - " # 关闭 XLS 文件\n", - " # workbook.close()\n", + "# row_data = sheet.row_values(1)\n", + "# one_cols = row_data\n", + "\n", + "# # 关闭 XLS 文件\n", + "# # workbook.close()\n", "\n", "def start_3(date=None,token=None,token_push=None):\n", " '''预测上传数据'''\n", @@ -570,74 +663,111 @@ " else:\n", " return None\n", "\n", - "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", "\n", + "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", " current_year_month = datetime.now().strftime('%Y-%m')\n", " grouped = data_df.groupby(\"dataDate\")\n", "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xls')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - "\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", + " # 使用openpyxl打开xlsx文件\n", + " from openpyxl import load_workbook\n", + " workbook = load_workbook('沥青数据项.xlsx')\n", "\n", + " # 创建新工作簿\n", + " new_workbook = load_workbook('沥青数据项.xlsx')\n", + " \n", + " for sheetname in workbook.sheetnames:\n", + " sheet = workbook[sheetname]\n", + " new_sheet = new_workbook[sheetname]\n", + " \n", " current_year_month_row = 0\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " col0 = data[row][0]\n", - " # print(\"col0\",col0[:7])\n", - " if col0[:7] == current_year_month:\n", - " current_year_month_row += 1\n", - " break\n", - " new_sheet.write(row, col, data[row][col])\n", + " # 查找当前月份数据起始行\n", + " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", + " if str(row[0]).startswith(current_year_month):\n", + " current_year_month_row += 1\n", "\n", - "\n", - " # print(\"current_year_month_row\",current_year_month_row)\n", - " if i == 0:\n", - " rowFlag = 0\n", - " # 查看每组数据\n", - " for date, group in grouped:\n", - " new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", - " for j in range(len(dataItemNoList)):\n", - " dataItemNo = dataItemNoList[j]\n", - "\n", - " # for dataItemNo in dataItemNoList:\n", + " # 追加新数据\n", + " if sheetname == workbook.sheetnames[0]:\n", + " start_row = sheet.max_row - current_year_month_row + 1\n", + " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", + " new_sheet.cell(row=row_idx, column=1, value=date)\n", + " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + " new_sheet.cell(row=row_idx, column=j, \n", + " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", "\n", - " new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - " rowFlag += 1\n", - "\n", - "\n", - " # 保存新的xls文件\n", + " # 保存修改后的xlsx文件\n", " new_workbook.save(\"沥青数据项.xlsx\")\n", "\n", + "\n", + "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + "# from datetime import datetime, timedelta\n", + "# current_year_month = datetime.now().strftime('%Y-%m')\n", + "# grouped = data_df.groupby(\"dataDate\")\n", + "\n", + "# # 打开xls文件\n", + "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + "# # 获取所有sheet的个数\n", + "# sheet_count = len(workbook.sheet_names())\n", + "\n", + "# # 获取所有sheet的名称\n", + "# sheet_names = workbook.sheet_names()\n", + "\n", + "# new_workbook = xlwt.Workbook()\n", + "# for i in range(sheet_count):\n", + "# # 获取当前sheet\n", + "# sheet = workbook.sheet_by_index(i)\n", + "\n", + "# # 获取sheet的行数和列数\n", + "# row_count = sheet.nrows\n", + "# col_count = sheet.ncols\n", + "# # 获取原有数据\n", + "# data = []\n", + "# for row in range(row_count):\n", + "# row_data = []\n", + "# for col in range(col_count):\n", + "# row_data.append(sheet.cell_value(row, col))\n", + "# data.append(row_data)\n", + "\n", + "# # 创建xlwt的Workbook对象\n", + "# # 创建sheet\n", + "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + "\n", + "# current_year_month_row = 0\n", + "# # 将原有的数据写入新的sheet\n", + "# for row in range(row_count):\n", + "# for col in range(col_count):\n", + "# col0 = data[row][0]\n", + "# # print(\"col0\",col0[:7])\n", + "# if col0[:7] == current_year_month:\n", + "# current_year_month_row += 1\n", + "# break\n", + "# new_sheet.write(row, col, data[row][col])\n", + "\n", + "\n", + "# # print(\"current_year_month_row\",current_year_month_row)\n", + "# if i == 0:\n", + "# rowFlag = 0\n", + "# # 查看每组数据\n", + "# for date, group in grouped:\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", + "# for j in range(len(dataItemNoList)):\n", + "# dataItemNo = dataItemNoList[j]\n", + "\n", + "# # for dataItemNo in dataItemNoList:\n", + "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + "\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + "# rowFlag += 1\n", + "\n", + "\n", + "# # 保存新的xls文件\n", + "# new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", "def queryDataListItemNos(token=None):\n", + " from datetime import datetime, timedelta\n", " df = pd.read_excel('沥青数据项.xlsx')\n", " dataItemNoList = df.iloc[0].tolist()[1:]\n", " \n", @@ -668,7 +798,130 @@ " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", " print('当月数据更新完成')\n", "\n", + "\n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\") \n", + "\n", + "\n", + "\n", + "def start(date=''):\n", + " \"\"\"获取当日数据\"\"\"\n", + " read_xls_data()\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + " cur_time,cur_time2 = getNow(date)\n", + " print(f\"获取{cur_time}数据\")\n", + " datas = get_data_value(token, one_cols,date=cur_time)\n", + " print(len(datas))\n", + " print(datas)\n", + " if not datas:\n", + " return\n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " \n", + " print('添加的行:',len(append_rows),append_rows)\n", + " save_xls_2(append_rows)\n", + "\n", + "\n", + "def save_xls_2(append_rows):\n", + " \"\"\"保存或更新数据到Excel文件\n", + " 参数:\n", + " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", + " \"\"\"\n", + " # try:\n", + " # 读取现有数据(假设第一行为列名)\n", + " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", + " print('文件中的数据列数:',len(df.columns),df.columns)\n", + " # 转换append_rows为DataFrame\n", + " if len(append_rows) != len(df.columns):\n", + " # 去除第二个元素 ,不知道什么原因多一个空数据\n", + " append_rows.pop(1)\n", + " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", + " # 创建新数据行\n", + " new_date = append_rows['日期'].values[0]\n", + " \n", + " dates = df['日期'].to_list()\n", + " # 判断日期是否存在\n", + " if new_date in dates:\n", + " # 找到日期所在行的索引\n", + " date_mask = df['日期'] == new_date\n", + " # 存在则更新数据\n", + " df.loc[date_mask] = append_rows.values\n", + " print(f\"更新 {new_date} 数据\")\n", + " else:\n", + " # 不存在则追加数据\n", + " df = pd.concat([df, append_rows], ignore_index=True)\n", + " print(df.head())\n", + " print(df.tail())\n", + " print(f\"插入 {new_date} 新数据\")\n", + " \n", + " # 保存更新后的数据\n", + " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " \n", + " # except FileNotFoundError:\n", + " # # 如果文件不存在则创建新文件\n", + " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " # except Exception as e:\n", + " # print(f\"保存数据时发生错误: {str(e)}\")\n", + "\n", + "\n", "def main(start_date=None,token=None,token_push=None):\n", + " from datatime import datetime\n", " if start_date is None:\n", " start_date = datetime.now()\n", " if token is None:\n", @@ -677,63 +930,192 @@ " token_push = get_head_push_auth()\n", " date = start_date.strftime('%Y%m%d')\n", " print(date)\n", + "# start(date)\n", " # 更新当月数据\n", " queryDataListItemNos(token)\n", " # 训练模型\n", " optimize_Model()\n", - " # 预测&上传预测结果\n", + " # # 预测&上传预测结果\n", " upload_data_to_system(token_push,start_date)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { +<<<<<<< .mine + "ename": "AttributeError", + "evalue": "module 'datetime' has no attribute 'now'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n", + "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n", + "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'" +||||||| .r87044 "name": "stdout", "output_type": "stream", "text": [ "运行中ing...\n" +======= + "name": "stdout", + "output_type": "stream", + "text": [ + "20250522\n" +>>>>>>> .r87201 + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: inline\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 90\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "日期\n", + "2025-05-22 3600.097412\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n" ] } ], "source": [ "\n", - "if __name__ == \"__main__\":\n", - " print(\"运行中ing...\")\n", - " # 每天定时12点运行\n", - " while True:\n", - " # 获取当前时间\n", - " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", - " # print(current_time)\n", + "# if __name__ == \"__main__\":\n", + "# print(\"运行中ing...\")\n", + "# # 每天定时12点运行\n", + "# while True:\n", + "# # 获取当前时间\n", + "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "# # print(current_time)\n", "\n", +<<<<<<< .mine + "# # 判断当前时间是否为执行任务的时间点\n", + "# try:\n", + "# if current_time == \"09:13:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# elif current_time == \"09:18:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# time.sleep(1)\n", + "# except :\n", + "# print(f\"{current_time}任务失败\")\n", + "main()\n", + "main()\n", +||||||| .r87044 " # 判断当前时间是否为执行任务的时间点\n", " try:\n", - " if current_time == \"09:15:00\":\n", + " if current_time == \"09:13:00\":\n", + " print(\"执行定时任务\")\n", + " main()\n", + " elif current_time == \"09:18:00\":\n", " print(\"执行定时任务\")\n", " main()\n", - " elif current_time == \"20:00:00\":\n", - " print('更新数据')\n", - " start_3()\n", " time.sleep(1)\n", " except :\n", " print(f\"{current_time}任务失败\")\n", - "\n", - "\n", + "# main()\n", +======= + "# # 判断当前时间是否为执行任务的时间点\n", + "# try:\n", + "# if current_time == \"09:13:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# elif current_time == \"09:18:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# time.sleep(1)\n", + "# except :\n", + "# print(f\"{current_time}任务失败\")\n", + "main()\n", +>>>>>>> .r87201 " # 检测数据准确性, 需要检测放开\n", " # check_data(\"100028098|LISTING_PRICE\")\n", - " # check_data(\"9137070016544622XB|DAY_Yield\")\n" + " # check_data(\"9137070016544622XB|DAY_Yield\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# start_date = datetime(2025, 4, 1)\n", - "# end_date = datetime(2025, 5, 7)\n", + "# start_date = datetime(2025, 5, 16)\n", + "# end_date = datetime(2025, 5, 19)\n", "# token = get_head_auth()\n", "\n", "# token_push = get_head_push_auth()\n", @@ -745,7 +1127,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -753,86 +1135,9 @@ "\n", "\n", "class codeBackup:\n", - " def save_xls_1(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows - 1\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\") \n", "\n", " \n", "\n", - " def start_2(date,token):\n", - " '''更新数据'''\n", - " read_xls_data()\n", - "\n", - " # token = get_head_auth()\n", - " # if not token:\n", - " # return\n", - " \n", - "\n", - " datas = get_data_value(token, one_cols[1:],date)\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time(date=date)[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " print('新增数据:',append_rows)\n", - " save_xls_1(append_rows)\n", - "\n", - " \n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - " \n", "\n", " def write_xls(data,date):\n", " # 创建一个Workbook对象\n", @@ -932,7 +1237,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -946,7 +1251,13 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", +<<<<<<< .mine + "version": "3.11.7" +||||||| .r87044 "version": "3.7.0" +======= + "version": "3.11.11" +>>>>>>> .r87201 } }, "nbformat": 4, diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine new file mode 100644 index 0000000..5e7fb95 --- /dev/null +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine @@ -0,0 +1,1100 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime, timedelta\n", + "import time\n", + "import pandas as pd\n", + "\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"沥青数据项.xlsx\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import statsmodels.api as sm\n", + "# from keras.preprocessing.sequence import TimeseriesGenerator\n", + "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date=''):\n", + "\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": getNow(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return None\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "def getNow(date='', offset=0):\n", + " \"\"\"生成指定日期的两种格式字符串\n", + " Args:\n", + " date: 支持多种输入类型:\n", + " - datetime对象\n", + " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", + " - 空字符串表示当前日期\n", + " offset: 日期偏移天数\n", + " Returns:\n", + " tuple: (紧凑日期字符串, 标准日期字符串)\n", + " \"\"\"\n", + " # 日期解析逻辑\n", + " from datetime import datetime,timedelta\n", + " if isinstance(date, datetime):\n", + " now = date\n", + " else:\n", + " now = datetime.now()\n", + " if date:\n", + " # 尝试多种日期格式解析\n", + " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", + " try:\n", + " now = datetime.strptime(str(date), fmt)\n", + " break\n", + " except ValueError:\n", + " continue\n", + " else:\n", + " raise ValueError(f\"无法解析的日期格式: {date}\")\n", + "\n", + " # 应用日期偏移\n", + " now = now - timedelta(days=offset)\n", + " \n", + " # 统一格式化输出\n", + " date_str = now.strftime(\"%Y-%m-%d\")\n", + " compact_date = date_str.replace(\"-\", \"\")\n", + " return compact_date, date_str\n", + "\n", + "\n", + "\n", + "# def get_cur_time(date = ''):\n", + "# if date == '':\n", + "# now = datetime.now()\n", + "# else:\n", + "# now = date\n", + "# year = now.year\n", + "# month = now.month\n", + "# day = now.day\n", + "\n", + "# if month < 10:\n", + "# month = \"0\" + str(month)\n", + "# if day < 10:\n", + "# day = \"0\" + str(day)\n", + "# cur_time = str(year) + str(month) + str(day)\n", + "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# # cur_time = '20231007'\n", + "# # cur_time2 = '2023-10-07'\n", + "# return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", + " \"dataDate\": getNow(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + "def forecast_price():\n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", + " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #查看每个特征缺失值数量\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + "\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " # df_test_1_Day\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + " # df_test.tail(3)\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", + " df_test_1.index = df_test_1[\"日期\"]\n", + " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", + " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['京博指导价']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + " \n", + " \n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " import statsmodels.api as sm\n", + " # from keras.preprocessing.sequence import TimeseriesGenerator\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", + " print(\"Best parameters set:\")\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " for param_name in sorted(parameters.keys()):\n", + " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " # results = model_results1.append(model_results2, ignore_index = False)\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + " results\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "def read_xls_data():\n", + " \"\"\"获取特征项ID\"\"\"\n", + " global one_cols, two_cols\n", + " # 使用pandas读取Excel文件\n", + " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", + " # 获取第二行数据(索引为1)\n", + " one_cols = df.iloc[1].tolist()[1:]\n", + " print(f'获取到的数据项ID{one_cols}')\n", + "\n", + "# def read_xls_data():\n", + "# global one_cols, two_cols\n", + "# # 打开 XLS 文件\n", + "# workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "# # 获取所有表格名称\n", + "# # sheet_names = workbook.sheet_names()\n", + "\n", + "# # 选择第一个表格\n", + "# sheet = workbook.sheet_by_index(0)\n", + "\n", + "# # 获取行数和列数\n", + "# num_rows = sheet.nrows\n", + "# # num_cols = sheet.ncols\n", + "\n", + "# # 遍历每一行,获取单元格数据\n", + "# # for i in range(num_rows):\n", + "# # row_data = sheet.row_values(i)\n", + "# # one_cols.append(row_data)\n", + "# # two_cols.append(row_data[1])\n", + "\n", + "# row_data = sheet.row_values(1)\n", + "# one_cols = row_data\n", + "\n", + "# # 关闭 XLS 文件\n", + "# # workbook.close()\n", + "\n", + "def start_3(date=None,token=None,token_push=None):\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " if date==None:\n", + " date = datetime.now()\n", + " if token==None:\n", + " token = get_head_auth()\n", + " if token_push==None:\n", + " token = get_head_auth()\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + "\n", + " # 获取当月的数据写入到指定文件\n", + " # optimize_Model()\n", + " # upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", + "\n", + " search_data = {\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\",\n", + " \"data\": {\n", + " \"dateStart\": dateStart,\n", + " \"dateEnd\": dateEnd,\n", + " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", + " }\n", + " }\n", + "\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " return None\n", + "\n", + "\n", + "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + " current_year_month = datetime.now().strftime('%Y-%m')\n", + " grouped = data_df.groupby(\"dataDate\")\n", + "\n", + " # 使用openpyxl打开xlsx文件\n", + " from openpyxl import load_workbook\n", + " workbook = load_workbook('沥青数据项.xlsx')\n", + "\n", + " # 创建新工作簿\n", + " new_workbook = load_workbook('沥青数据项.xlsx')\n", + " \n", + " for sheetname in workbook.sheetnames:\n", + " sheet = workbook[sheetname]\n", + " new_sheet = new_workbook[sheetname]\n", + " \n", + " current_year_month_row = 0\n", + " # 查找当前月份数据起始行\n", + " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", + " if str(row[0]).startswith(current_year_month):\n", + " current_year_month_row += 1\n", + "\n", + " # 追加新数据\n", + " if sheetname == workbook.sheetnames[0]:\n", + " start_row = sheet.max_row - current_year_month_row + 1\n", + " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", + " new_sheet.cell(row=row_idx, column=1, value=date)\n", + " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", + " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + " new_sheet.cell(row=row_idx, column=j, \n", + " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + " # 保存修改后的xlsx文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "\n", + "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + "# from datetime import datetime, timedelta\n", + "# current_year_month = datetime.now().strftime('%Y-%m')\n", + "# grouped = data_df.groupby(\"dataDate\")\n", + "\n", + "# # 打开xls文件\n", + "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + "# # 获取所有sheet的个数\n", + "# sheet_count = len(workbook.sheet_names())\n", + "\n", + "# # 获取所有sheet的名称\n", + "# sheet_names = workbook.sheet_names()\n", + "\n", + "# new_workbook = xlwt.Workbook()\n", + "# for i in range(sheet_count):\n", + "# # 获取当前sheet\n", + "# sheet = workbook.sheet_by_index(i)\n", + "\n", + "# # 获取sheet的行数和列数\n", + "# row_count = sheet.nrows\n", + "# col_count = sheet.ncols\n", + "# # 获取原有数据\n", + "# data = []\n", + "# for row in range(row_count):\n", + "# row_data = []\n", + "# for col in range(col_count):\n", + "# row_data.append(sheet.cell_value(row, col))\n", + "# data.append(row_data)\n", + "\n", + "# # 创建xlwt的Workbook对象\n", + "# # 创建sheet\n", + "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + "\n", + "# current_year_month_row = 0\n", + "# # 将原有的数据写入新的sheet\n", + "# for row in range(row_count):\n", + "# for col in range(col_count):\n", + "# col0 = data[row][0]\n", + "# # print(\"col0\",col0[:7])\n", + "# if col0[:7] == current_year_month:\n", + "# current_year_month_row += 1\n", + "# break\n", + "# new_sheet.write(row, col, data[row][col])\n", + "\n", + "\n", + "# # print(\"current_year_month_row\",current_year_month_row)\n", + "# if i == 0:\n", + "# rowFlag = 0\n", + "# # 查看每组数据\n", + "# for date, group in grouped:\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", + "# for j in range(len(dataItemNoList)):\n", + "# dataItemNo = dataItemNoList[j]\n", + "\n", + "# # for dataItemNo in dataItemNoList:\n", + "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + "\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + "# rowFlag += 1\n", + "\n", + "\n", + "# # 保存新的xls文件\n", + "# new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def queryDataListItemNos(token=None):\n", + " from datetime import datetime, timedelta\n", + " df = pd.read_excel('沥青数据项.xlsx')\n", + " dataItemNoList = df.iloc[0].tolist()[1:]\n", + " \n", + " if token is None:\n", + " token = get_head_auth()\n", + "\n", + " if not token:\n", + " print('token获取失败')\n", + " return\n", + "\n", + " # 获取当前日期\n", + " current_date = datetime.now()\n", + "\n", + " # 获取当月1日\n", + " first_day_of_month = current_date.replace(day=1)\n", + "\n", + " # 格式化为 YYYYMMDD 格式\n", + " dateEnd = current_date.strftime('%Y%m%d')\n", + " dateStart = first_day_of_month.strftime('%Y%m%d')\n", + "\n", + " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", + "\n", + "\n", + " data_df = pd.DataFrame(search_value)\n", + "\n", + " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", + " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", + " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", + " print('当月数据更新完成')\n", + "\n", + "\n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\") \n", + "\n", + "\n", + "\n", + "def start(date=''):\n", + " \"\"\"获取当日数据\"\"\"\n", + " read_xls_data()\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + " cur_time,cur_time2 = getNow(date)\n", + " print(f\"获取{cur_time}数据\")\n", + " datas = get_data_value(token, one_cols,date=cur_time)\n", + " print(len(datas))\n", + " print(datas)\n", + " if not datas:\n", + " return\n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " \n", + " print('添加的行:',len(append_rows),append_rows)\n", + " save_xls_2(append_rows)\n", + "\n", + "\n", + "def save_xls_2(append_rows):\n", + " \"\"\"保存或更新数据到Excel文件\n", + " 参数:\n", + " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", + " \"\"\"\n", + " # try:\n", + " # 读取现有数据(假设第一行为列名)\n", + " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", + " print('文件中的数据列数:',len(df.columns),df.columns)\n", + " # 转换append_rows为DataFrame\n", + " if len(append_rows) != len(df.columns):\n", + " # 去除第二个元素 ,不知道什么原因多一个空数据\n", + " append_rows.pop(1)\n", + " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", + " # 创建新数据行\n", + " new_date = append_rows['日期'].values[0]\n", + " \n", + " dates = df['日期'].to_list()\n", + " # 判断日期是否存在\n", + " if new_date in dates:\n", + " # 找到日期所在行的索引\n", + " date_mask = df['日期'] == new_date\n", + " # 存在则更新数据\n", + " df.loc[date_mask] = append_rows.values\n", + " print(f\"更新 {new_date} 数据\")\n", + " else:\n", + " # 不存在则追加数据\n", + " df = pd.concat([df, append_rows], ignore_index=True)\n", + " print(df.head())\n", + " print(df.tail())\n", + " print(f\"插入 {new_date} 新数据\")\n", + " \n", + " # 保存更新后的数据\n", + " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " \n", + " # except FileNotFoundError:\n", + " # # 如果文件不存在则创建新文件\n", + " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " # except Exception as e:\n", + " # print(f\"保存数据时发生错误: {str(e)}\")\n", + "\n", + "\n", + "def main(start_date=None,token=None,token_push=None):\n", + " from datatime import datetime\n", + " if start_date is None:\n", + " start_date = datetime.now()\n", + " if token is None:\n", + " token = get_head_auth()\n", + " if token_push is None:\n", + " token_push = get_head_push_auth()\n", + " date = start_date.strftime('%Y%m%d')\n", + " print(date)\n", + "# start(date)\n", + " # 更新当月数据\n", + " queryDataListItemNos(token)\n", + " # 训练模型\n", + " optimize_Model()\n", + " # # 预测&上传预测结果\n", + " upload_data_to_system(token_push,start_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'datetime' has no attribute 'now'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n", + "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n", + "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'" + ] + } + ], + "source": [ + "\n", + "# if __name__ == \"__main__\":\n", + "# print(\"运行中ing...\")\n", + "# # 每天定时12点运行\n", + "# while True:\n", + "# # 获取当前时间\n", + "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "# # print(current_time)\n", + "\n", + "# # 判断当前时间是否为执行任务的时间点\n", + "# try:\n", + "# if current_time == \"09:13:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# elif current_time == \"09:18:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# time.sleep(1)\n", + "# except :\n", + "# print(f\"{current_time}任务失败\")\n", + "main()\n", + "main()\n", + " # 检测数据准确性, 需要检测放开\n", + " # check_data(\"100028098|LISTING_PRICE\")\n", + " # check_data(\"9137070016544622XB|DAY_Yield\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start_date = datetime(2025, 5, 16)\n", + "# end_date = datetime(2025, 5, 19)\n", + "# token = get_head_auth()\n", + "\n", + "# token_push = get_head_push_auth()\n", + "\n", + "# while start_date < end_date:\n", + "# main(start_date,token,token_push)\n", + "# start_date += timedelta(days=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### 代码备份:\n", + "\n", + "\n", + "class codeBackup:\n", + "\n", + " \n", + "\n", + "\n", + " def write_xls(data,date):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time(date)[0] + '.xls')\n", + "\n", + "\n", + " def start():\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " \n", + " # 获取当月的数据写入到指定文件\n", + " queryDataListItemNos(token)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "\n", + "\n", + " def start_1():\n", + " '''更新数据'''\n", + " print(\"更新当天数据\")\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " print(\"当天数据为:\",append_rows)\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 new file mode 100644 index 0000000..2f999bd --- /dev/null +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 @@ -0,0 +1,1095 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n", + "\n", + "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime, timedelta\n", + "import time\n", + "import pandas as pd\n", + "\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"沥青数据项.xlsx\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import statsmodels.api as sm\n", + "# from keras.preprocessing.sequence import TimeseriesGenerator\n", + "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date=''):\n", + "\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": getNow(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return None\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "def getNow(date='', offset=0):\n", + " \"\"\"生成指定日期的两种格式字符串\n", + " Args:\n", + " date: 支持多种输入类型:\n", + " - datetime对象\n", + " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", + " - 空字符串表示当前日期\n", + " offset: 日期偏移天数\n", + " Returns:\n", + " tuple: (紧凑日期字符串, 标准日期字符串)\n", + " \"\"\"\n", + " # 日期解析逻辑\n", + " from datetime import datetime,timedelta\n", + " if isinstance(date, datetime):\n", + " now = date\n", + " else:\n", + " now = datetime.now()\n", + " if date:\n", + " # 尝试多种日期格式解析\n", + " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", + " try:\n", + " now = datetime.strptime(str(date), fmt)\n", + " break\n", + " except ValueError:\n", + " continue\n", + " else:\n", + " raise ValueError(f\"无法解析的日期格式: {date}\")\n", + "\n", + " # 应用日期偏移\n", + " now = now - timedelta(days=offset)\n", + " \n", + " # 统一格式化输出\n", + " date_str = now.strftime(\"%Y-%m-%d\")\n", + " compact_date = date_str.replace(\"-\", \"\")\n", + " return compact_date, date_str\n", + "\n", + "\n", + "\n", + "# def get_cur_time(date = ''):\n", + "# if date == '':\n", + "# now = datetime.now()\n", + "# else:\n", + "# now = date\n", + "# year = now.year\n", + "# month = now.month\n", + "# day = now.day\n", + "\n", + "# if month < 10:\n", + "# month = \"0\" + str(month)\n", + "# if day < 10:\n", + "# day = \"0\" + str(day)\n", + "# cur_time = str(year) + str(month) + str(day)\n", + "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# # cur_time = '20231007'\n", + "# # cur_time2 = '2023-10-07'\n", + "# return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", + " \"dataDate\": getNow(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + "def forecast_price():\n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", + " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #查看每个特征缺失值数量\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + "\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " # df_test_1_Day\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + " # df_test.tail(3)\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", + " df_test_1.index = df_test_1[\"日期\"]\n", + " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", + " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['京博指导价']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + " \n", + " \n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " import statsmodels.api as sm\n", + " # from keras.preprocessing.sequence import TimeseriesGenerator\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", + " print(\"Best parameters set:\")\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " for param_name in sorted(parameters.keys()):\n", + " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " # results = model_results1.append(model_results2, ignore_index = False)\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + " results\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "def read_xls_data():\n", + " \"\"\"获取特征项ID\"\"\"\n", + " global one_cols, two_cols\n", + " # 使用pandas读取Excel文件\n", + " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", + " # 获取第二行数据(索引为1)\n", + " one_cols = df.iloc[1].tolist()[1:]\n", + " print(f'获取到的数据项ID{one_cols}')\n", + "\n", + "# def read_xls_data():\n", + "# global one_cols, two_cols\n", + "# # 打开 XLS 文件\n", + "# workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "# # 获取所有表格名称\n", + "# # sheet_names = workbook.sheet_names()\n", + "\n", + "# # 选择第一个表格\n", + "# sheet = workbook.sheet_by_index(0)\n", + "\n", + "# # 获取行数和列数\n", + "# num_rows = sheet.nrows\n", + "# # num_cols = sheet.ncols\n", + "\n", + "# # 遍历每一行,获取单元格数据\n", + "# # for i in range(num_rows):\n", + "# # row_data = sheet.row_values(i)\n", + "# # one_cols.append(row_data)\n", + "# # two_cols.append(row_data[1])\n", + "\n", + "# row_data = sheet.row_values(1)\n", + "# one_cols = row_data\n", + "\n", + "# # 关闭 XLS 文件\n", + "# # workbook.close()\n", + "\n", + "def start_3(date=None,token=None,token_push=None):\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " if date==None:\n", + " date = datetime.now()\n", + " if token==None:\n", + " token = get_head_auth()\n", + " if token_push==None:\n", + " token = get_head_auth()\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + "\n", + " # 获取当月的数据写入到指定文件\n", + " # optimize_Model()\n", + " # upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", + "\n", + " search_data = {\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\",\n", + " \"data\": {\n", + " \"dateStart\": dateStart,\n", + " \"dateEnd\": dateEnd,\n", + " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", + " }\n", + " }\n", + "\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " return None\n", + "\n", + "\n", + "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + " current_year_month = datetime.now().strftime('%Y-%m')\n", + " grouped = data_df.groupby(\"dataDate\")\n", + "\n", + " # 使用openpyxl打开xlsx文件\n", + " from openpyxl import load_workbook\n", + " workbook = load_workbook('沥青数据项.xlsx')\n", + "\n", + " # 创建新工作簿\n", + " new_workbook = load_workbook('沥青数据项.xlsx')\n", + " \n", + " for sheetname in workbook.sheetnames:\n", + " sheet = workbook[sheetname]\n", + " new_sheet = new_workbook[sheetname]\n", + " \n", + " current_year_month_row = 0\n", + " # 查找当前月份数据起始行\n", + " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", + " if str(row[0]).startswith(current_year_month):\n", + " current_year_month_row += 1\n", + "\n", + " # 追加新数据\n", + " if sheetname == workbook.sheetnames[0]:\n", + " start_row = sheet.max_row - current_year_month_row + 1\n", + " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", + " new_sheet.cell(row=row_idx, column=1, value=date)\n", + " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", + " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + " new_sheet.cell(row=row_idx, column=j, \n", + " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + " # 保存修改后的xlsx文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "\n", + "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + "# from datetime import datetime, timedelta\n", + "# current_year_month = datetime.now().strftime('%Y-%m')\n", + "# grouped = data_df.groupby(\"dataDate\")\n", + "\n", + "# # 打开xls文件\n", + "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + "# # 获取所有sheet的个数\n", + "# sheet_count = len(workbook.sheet_names())\n", + "\n", + "# # 获取所有sheet的名称\n", + "# sheet_names = workbook.sheet_names()\n", + "\n", + "# new_workbook = xlwt.Workbook()\n", + "# for i in range(sheet_count):\n", + "# # 获取当前sheet\n", + "# sheet = workbook.sheet_by_index(i)\n", + "\n", + "# # 获取sheet的行数和列数\n", + "# row_count = sheet.nrows\n", + "# col_count = sheet.ncols\n", + "# # 获取原有数据\n", + "# data = []\n", + "# for row in range(row_count):\n", + "# row_data = []\n", + "# for col in range(col_count):\n", + "# row_data.append(sheet.cell_value(row, col))\n", + "# data.append(row_data)\n", + "\n", + "# # 创建xlwt的Workbook对象\n", + "# # 创建sheet\n", + "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + "\n", + "# current_year_month_row = 0\n", + "# # 将原有的数据写入新的sheet\n", + "# for row in range(row_count):\n", + "# for col in range(col_count):\n", + "# col0 = data[row][0]\n", + "# # print(\"col0\",col0[:7])\n", + "# if col0[:7] == current_year_month:\n", + "# current_year_month_row += 1\n", + "# break\n", + "# new_sheet.write(row, col, data[row][col])\n", + "\n", + "\n", + "# # print(\"current_year_month_row\",current_year_month_row)\n", + "# if i == 0:\n", + "# rowFlag = 0\n", + "# # 查看每组数据\n", + "# for date, group in grouped:\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", + "# for j in range(len(dataItemNoList)):\n", + "# dataItemNo = dataItemNoList[j]\n", + "\n", + "# # for dataItemNo in dataItemNoList:\n", + "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + "\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + "# rowFlag += 1\n", + "\n", + "\n", + "# # 保存新的xls文件\n", + "# new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def queryDataListItemNos(token=None):\n", + " from datetime import datetime, timedelta\n", + " df = pd.read_excel('沥青数据项.xlsx')\n", + " dataItemNoList = df.iloc[0].tolist()[1:]\n", + " \n", + " if token is None:\n", + " token = get_head_auth()\n", + "\n", + " if not token:\n", + " print('token获取失败')\n", + " return\n", + "\n", + " # 获取当前日期\n", + " current_date = datetime.now()\n", + "\n", + " # 获取当月1日\n", + " first_day_of_month = current_date.replace(day=1)\n", + "\n", + " # 格式化为 YYYYMMDD 格式\n", + " dateEnd = current_date.strftime('%Y%m%d')\n", + " dateStart = first_day_of_month.strftime('%Y%m%d')\n", + "\n", + " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", + "\n", + "\n", + " data_df = pd.DataFrame(search_value)\n", + "\n", + " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", + " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", + " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", + " print('当月数据更新完成')\n", + "\n", + "\n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\") \n", + "\n", + "\n", + "\n", + "def start(date=''):\n", + " \"\"\"获取当日数据\"\"\"\n", + " read_xls_data()\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + " cur_time,cur_time2 = getNow(date)\n", + " print(f\"获取{cur_time}数据\")\n", + " datas = get_data_value(token, one_cols,date=cur_time)\n", + " print(len(datas))\n", + " print(datas)\n", + " if not datas:\n", + " return\n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " \n", + " print('添加的行:',len(append_rows),append_rows)\n", + " save_xls_2(append_rows)\n", + "\n", + "\n", + "def save_xls_2(append_rows):\n", + " \"\"\"保存或更新数据到Excel文件\n", + " 参数:\n", + " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", + " \"\"\"\n", + " # try:\n", + " # 读取现有数据(假设第一行为列名)\n", + " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", + " print('文件中的数据列数:',len(df.columns),df.columns)\n", + " # 转换append_rows为DataFrame\n", + " if len(append_rows) != len(df.columns):\n", + " # 去除第二个元素 ,不知道什么原因多一个空数据\n", + " append_rows.pop(1)\n", + " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", + " # 创建新数据行\n", + " new_date = append_rows['日期'].values[0]\n", + " \n", + " dates = df['日期'].to_list()\n", + " # 判断日期是否存在\n", + " if new_date in dates:\n", + " # 找到日期所在行的索引\n", + " date_mask = df['日期'] == new_date\n", + " # 存在则更新数据\n", + " df.loc[date_mask] = append_rows.values\n", + " print(f\"更新 {new_date} 数据\")\n", + " else:\n", + " # 不存在则追加数据\n", + " df = pd.concat([df, append_rows], ignore_index=True)\n", + " print(df.head())\n", + " print(df.tail())\n", + " print(f\"插入 {new_date} 新数据\")\n", + " \n", + " # 保存更新后的数据\n", + " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " \n", + " # except FileNotFoundError:\n", + " # # 如果文件不存在则创建新文件\n", + " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " # except Exception as e:\n", + " # print(f\"保存数据时发生错误: {str(e)}\")\n", + "\n", + "\n", + "def main(start_date=None,token=None,token_push=None):\n", + " if start_date is None:\n", + " start_date = datetime.now()\n", + " if token is None:\n", + " token = get_head_auth()\n", + " if token_push is None:\n", + " token_push = get_head_push_auth()\n", + " date = start_date.strftime('%Y%m%d')\n", + " print(date)\n", + "# start(date)\n", + " # 更新当月数据\n", + " queryDataListItemNos(token)\n", + " # 训练模型\n", + " optimize_Model()\n", + " # # 预测&上传预测结果\n", + " upload_data_to_system(token_push,start_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "运行中ing...\n" + ] + } + ], + "source": [ + "\n", + "if __name__ == \"__main__\":\n", + " print(\"运行中ing...\")\n", + " # 每天定时12点运行\n", + " while True:\n", + " # 获取当前时间\n", + " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # print(current_time)\n", + "\n", + " # 判断当前时间是否为执行任务的时间点\n", + " try:\n", + " if current_time == \"09:13:00\":\n", + " print(\"执行定时任务\")\n", + " main()\n", + " elif current_time == \"09:18:00\":\n", + " print(\"执行定时任务\")\n", + " main()\n", + " time.sleep(1)\n", + " except :\n", + " print(f\"{current_time}任务失败\")\n", + "# main()\n", + " # 检测数据准确性, 需要检测放开\n", + " # check_data(\"100028098|LISTING_PRICE\")\n", + " # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start_date = datetime(2025, 5, 16)\n", + "# end_date = datetime(2025, 5, 19)\n", + "# token = get_head_auth()\n", + "\n", + "# token_push = get_head_push_auth()\n", + "\n", + "# while start_date < end_date:\n", + "# main(start_date,token,token_push)\n", + "# start_date += timedelta(days=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### 代码备份:\n", + "\n", + "\n", + "class codeBackup:\n", + "\n", + " \n", + "\n", + "\n", + " def write_xls(data,date):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time(date)[0] + '.xls')\n", + "\n", + "\n", + " def start():\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " \n", + " # 获取当月的数据写入到指定文件\n", + " queryDataListItemNos(token)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "\n", + "\n", + " def start_1():\n", + " '''更新数据'''\n", + " print(\"更新当天数据\")\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " print(\"当天数据为:\",append_rows)\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 new file mode 100644 index 0000000..44fa65f --- /dev/null +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 @@ -0,0 +1,1156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "from datetime import datetime, timedelta\n", + "import time\n", + "import pandas as pd\n", + "\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"沥青数据项.xlsx\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import statsmodels.api as sm\n", + "# from keras.preprocessing.sequence import TimeseriesGenerator\n", + "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date=''):\n", + "\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": getNow(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return None\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "def getNow(date='', offset=0):\n", + " \"\"\"生成指定日期的两种格式字符串\n", + " Args:\n", + " date: 支持多种输入类型:\n", + " - datetime对象\n", + " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", + " - 空字符串表示当前日期\n", + " offset: 日期偏移天数\n", + " Returns:\n", + " tuple: (紧凑日期字符串, 标准日期字符串)\n", + " \"\"\"\n", + " # 日期解析逻辑\n", + " from datetime import datetime,timedelta\n", + " if isinstance(date, datetime):\n", + " now = date\n", + " else:\n", + " now = datetime.now()\n", + " if date:\n", + " # 尝试多种日期格式解析\n", + " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", + " try:\n", + " now = datetime.strptime(str(date), fmt)\n", + " break\n", + " except ValueError:\n", + " continue\n", + " else:\n", + " raise ValueError(f\"无法解析的日期格式: {date}\")\n", + "\n", + " # 应用日期偏移\n", + " now = now - timedelta(days=offset)\n", + " \n", + " # 统一格式化输出\n", + " date_str = now.strftime(\"%Y-%m-%d\")\n", + " compact_date = date_str.replace(\"-\", \"\")\n", + " return compact_date, date_str\n", + "\n", + "\n", + "\n", + "# def get_cur_time(date = ''):\n", + "# if date == '':\n", + "# now = datetime.now()\n", + "# else:\n", + "# now = date\n", + "# year = now.year\n", + "# month = now.month\n", + "# day = now.day\n", + "\n", + "# if month < 10:\n", + "# month = \"0\" + str(month)\n", + "# if day < 10:\n", + "# day = \"0\" + str(day)\n", + "# cur_time = str(year) + str(month) + str(day)\n", + "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# # cur_time = '20231007'\n", + "# # cur_time2 = '2023-10-07'\n", + "# return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", + " \"dataDate\": getNow(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + "def forecast_price():\n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", + " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #查看每个特征缺失值数量\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + "\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " # df_test_1_Day\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('沥青数据项.xlsx')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + " # df_test.tail(3)\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", + " df_test_1.index = df_test_1[\"日期\"]\n", + " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", + " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['京博指导价']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + " \n", + " \n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " import statsmodels.api as sm\n", + " # from keras.preprocessing.sequence import TimeseriesGenerator\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", + " print(\"Best parameters set:\")\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " for param_name in sorted(parameters.keys()):\n", + " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " # results = model_results1.append(model_results2, ignore_index = False)\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + " results\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "def read_xls_data():\n", + " \"\"\"获取特征项ID\"\"\"\n", + " global one_cols, two_cols\n", + " # 使用pandas读取Excel文件\n", + " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", + " # 获取第二行数据(索引为1)\n", + " one_cols = df.iloc[1].tolist()[1:]\n", + " print(f'获取到的数据项ID{one_cols}')\n", + "\n", + "# def read_xls_data():\n", + "# global one_cols, two_cols\n", + "# # 打开 XLS 文件\n", + "# workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "# # 获取所有表格名称\n", + "# # sheet_names = workbook.sheet_names()\n", + "\n", + "# # 选择第一个表格\n", + "# sheet = workbook.sheet_by_index(0)\n", + "\n", + "# # 获取行数和列数\n", + "# num_rows = sheet.nrows\n", + "# # num_cols = sheet.ncols\n", + "\n", + "# # 遍历每一行,获取单元格数据\n", + "# # for i in range(num_rows):\n", + "# # row_data = sheet.row_values(i)\n", + "# # one_cols.append(row_data)\n", + "# # two_cols.append(row_data[1])\n", + "\n", + "# row_data = sheet.row_values(1)\n", + "# one_cols = row_data\n", + "\n", + "# # 关闭 XLS 文件\n", + "# # workbook.close()\n", + "\n", + "def start_3(date=None,token=None,token_push=None):\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " if date==None:\n", + " date = datetime.now()\n", + " if token==None:\n", + " token = get_head_auth()\n", + " if token_push==None:\n", + " token = get_head_auth()\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + "\n", + " # 获取当月的数据写入到指定文件\n", + " # optimize_Model()\n", + " # upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", + "\n", + " search_data = {\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\",\n", + " \"data\": {\n", + " \"dateStart\": dateStart,\n", + " \"dateEnd\": dateEnd,\n", + " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", + " }\n", + " }\n", + "\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " return None\n", + "\n", + "\n", + "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + " current_year_month = datetime.now().strftime('%Y-%m')\n", + " grouped = data_df.groupby(\"dataDate\")\n", + "\n", + " # 使用openpyxl打开xlsx文件\n", + " from openpyxl import load_workbook\n", + " workbook = load_workbook('沥青数据项.xlsx')\n", + "\n", + " # 创建新工作簿\n", + " new_workbook = load_workbook('沥青数据项.xlsx')\n", + " \n", + " for sheetname in workbook.sheetnames:\n", + " sheet = workbook[sheetname]\n", + " new_sheet = new_workbook[sheetname]\n", + " \n", + " current_year_month_row = 0\n", + " # 查找当前月份数据起始行\n", + " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", + " if str(row[0]).startswith(current_year_month):\n", + " current_year_month_row += 1\n", + "\n", + " # 追加新数据\n", + " if sheetname == workbook.sheetnames[0]:\n", + " start_row = sheet.max_row - current_year_month_row + 1\n", + " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", + " new_sheet.cell(row=row_idx, column=1, value=date)\n", + " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", + " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + " new_sheet.cell(row=row_idx, column=j, \n", + " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + " # 保存修改后的xlsx文件\n", + " new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "\n", + "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + "# from datetime import datetime, timedelta\n", + "# current_year_month = datetime.now().strftime('%Y-%m')\n", + "# grouped = data_df.groupby(\"dataDate\")\n", + "\n", + "# # 打开xls文件\n", + "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + "# # 获取所有sheet的个数\n", + "# sheet_count = len(workbook.sheet_names())\n", + "\n", + "# # 获取所有sheet的名称\n", + "# sheet_names = workbook.sheet_names()\n", + "\n", + "# new_workbook = xlwt.Workbook()\n", + "# for i in range(sheet_count):\n", + "# # 获取当前sheet\n", + "# sheet = workbook.sheet_by_index(i)\n", + "\n", + "# # 获取sheet的行数和列数\n", + "# row_count = sheet.nrows\n", + "# col_count = sheet.ncols\n", + "# # 获取原有数据\n", + "# data = []\n", + "# for row in range(row_count):\n", + "# row_data = []\n", + "# for col in range(col_count):\n", + "# row_data.append(sheet.cell_value(row, col))\n", + "# data.append(row_data)\n", + "\n", + "# # 创建xlwt的Workbook对象\n", + "# # 创建sheet\n", + "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + "\n", + "# current_year_month_row = 0\n", + "# # 将原有的数据写入新的sheet\n", + "# for row in range(row_count):\n", + "# for col in range(col_count):\n", + "# col0 = data[row][0]\n", + "# # print(\"col0\",col0[:7])\n", + "# if col0[:7] == current_year_month:\n", + "# current_year_month_row += 1\n", + "# break\n", + "# new_sheet.write(row, col, data[row][col])\n", + "\n", + "\n", + "# # print(\"current_year_month_row\",current_year_month_row)\n", + "# if i == 0:\n", + "# rowFlag = 0\n", + "# # 查看每组数据\n", + "# for date, group in grouped:\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", + "# for j in range(len(dataItemNoList)):\n", + "# dataItemNo = dataItemNoList[j]\n", + "\n", + "# # for dataItemNo in dataItemNoList:\n", + "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", + "\n", + "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", + "\n", + "# rowFlag += 1\n", + "\n", + "\n", + "# # 保存新的xls文件\n", + "# new_workbook.save(\"沥青数据项.xlsx\")\n", + "\n", + "def queryDataListItemNos(token=None):\n", + " from datetime import datetime, timedelta\n", + " df = pd.read_excel('沥青数据项.xlsx')\n", + " dataItemNoList = df.iloc[0].tolist()[1:]\n", + " \n", + " if token is None:\n", + " token = get_head_auth()\n", + "\n", + " if not token:\n", + " print('token获取失败')\n", + " return\n", + "\n", + " # 获取当前日期\n", + " current_date = datetime.now()\n", + "\n", + " # 获取当月1日\n", + " first_day_of_month = current_date.replace(day=1)\n", + "\n", + " # 格式化为 YYYYMMDD 格式\n", + " dateEnd = current_date.strftime('%Y%m%d')\n", + " dateStart = first_day_of_month.strftime('%Y%m%d')\n", + "\n", + " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", + "\n", + "\n", + " data_df = pd.DataFrame(search_value)\n", + "\n", + " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", + " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", + " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", + " print('当月数据更新完成')\n", + "\n", + "\n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xlsx\") \n", + "\n", + "\n", + "\n", + "def start(date=''):\n", + " \"\"\"获取当日数据\"\"\"\n", + " read_xls_data()\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + " cur_time,cur_time2 = getNow(date)\n", + " print(f\"获取{cur_time}数据\")\n", + " datas = get_data_value(token, one_cols,date=cur_time)\n", + " print(len(datas))\n", + " print(datas)\n", + " if not datas:\n", + " return\n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " \n", + " print('添加的行:',len(append_rows),append_rows)\n", + " save_xls_2(append_rows)\n", + "\n", + "\n", + "def save_xls_2(append_rows):\n", + " \"\"\"保存或更新数据到Excel文件\n", + " 参数:\n", + " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", + " \"\"\"\n", + " # try:\n", + " # 读取现有数据(假设第一行为列名)\n", + " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", + " print('文件中的数据列数:',len(df.columns),df.columns)\n", + " # 转换append_rows为DataFrame\n", + " if len(append_rows) != len(df.columns):\n", + " # 去除第二个元素 ,不知道什么原因多一个空数据\n", + " append_rows.pop(1)\n", + " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", + " # 创建新数据行\n", + " new_date = append_rows['日期'].values[0]\n", + " \n", + " dates = df['日期'].to_list()\n", + " # 判断日期是否存在\n", + " if new_date in dates:\n", + " # 找到日期所在行的索引\n", + " date_mask = df['日期'] == new_date\n", + " # 存在则更新数据\n", + " df.loc[date_mask] = append_rows.values\n", + " print(f\"更新 {new_date} 数据\")\n", + " else:\n", + " # 不存在则追加数据\n", + " df = pd.concat([df, append_rows], ignore_index=True)\n", + " print(df.head())\n", + " print(df.tail())\n", + " print(f\"插入 {new_date} 新数据\")\n", + " \n", + " # 保存更新后的数据\n", + " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " \n", + " # except FileNotFoundError:\n", + " # # 如果文件不存在则创建新文件\n", + " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", + " # except Exception as e:\n", + " # print(f\"保存数据时发生错误: {str(e)}\")\n", + "\n", + "\n", + "def main(start_date=None,token=None,token_push=None):\n", + " if start_date is None:\n", + " start_date = datetime.now()\n", + " if token is None:\n", + " token = get_head_auth()\n", + " if token_push is None:\n", + " token_push = get_head_push_auth()\n", + " date = start_date.strftime('%Y%m%d')\n", + " print(date)\n", + "# start(date)\n", + " # 更新当月数据\n", + " queryDataListItemNos(token)\n", + " # 训练模型\n", + " optimize_Model()\n", + " # # 预测&上传预测结果\n", + " upload_data_to_system(token_push,start_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20250522\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: inline\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 90\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "日期\n", + "2025-05-22 3600.097412\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n" + ] + } + ], + "source": [ + "\n", + "# if __name__ == \"__main__\":\n", + "# print(\"运行中ing...\")\n", + "# # 每天定时12点运行\n", + "# while True:\n", + "# # 获取当前时间\n", + "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "# # print(current_time)\n", + "\n", + "# # 判断当前时间是否为执行任务的时间点\n", + "# try:\n", + "# if current_time == \"09:13:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# elif current_time == \"09:18:00\":\n", + "# print(\"执行定时任务\")\n", + "# main()\n", + "# time.sleep(1)\n", + "# except :\n", + "# print(f\"{current_time}任务失败\")\n", + "main()\n", + " # 检测数据准确性, 需要检测放开\n", + " # check_data(\"100028098|LISTING_PRICE\")\n", + " # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# start_date = datetime(2025, 5, 16)\n", + "# end_date = datetime(2025, 5, 19)\n", + "# token = get_head_auth()\n", + "\n", + "# token_push = get_head_push_auth()\n", + "\n", + "# while start_date < end_date:\n", + "# main(start_date,token,token_push)\n", + "# start_date += timedelta(days=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "### 代码备份:\n", + "\n", + "\n", + "class codeBackup:\n", + "\n", + " \n", + "\n", + "\n", + " def write_xls(data,date):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time(date)[0] + '.xls')\n", + "\n", + "\n", + " def start():\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " \n", + " # 获取当月的数据写入到指定文件\n", + " queryDataListItemNos(token)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "\n", + "\n", + " def start_1():\n", + " '''更新数据'''\n", + " print(\"更新当天数据\")\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " print(\"当天数据为:\",append_rows)\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/aisenzhecode/沥青/沥青数据项.xlsx b/aisenzhecode/沥青/沥青数据项.xlsx index 60c9a12..e5d9da4 100644 Binary files a/aisenzhecode/沥青/沥青数据项.xlsx and b/aisenzhecode/沥青/沥青数据项.xlsx differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87044 b/aisenzhecode/沥青/沥青数据项.xlsx.r87044 new file mode 100644 index 0000000..bd31931 Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项.xlsx.r87044 differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87201 b/aisenzhecode/沥青/沥青数据项.xlsx.r87201 new file mode 100644 index 0000000..cea746a Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项.xlsx.r87201 differ diff --git a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl index 47f7dce..50400c1 100644 Binary files a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl and b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl differ diff --git a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb index 28e7bd2..a36ad46 100644 --- a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb +++ b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb @@ -2,28 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", - "\n" - ] - }, { "data": { "text/html": [ "