diff --git a/aisenzhecode/沥青/定性模型数据项12-11.xlsx b/aisenzhecode/沥青/定性模型数据项12-11.xlsx index 5fb9d60..cf7ddad 100644 Binary files a/aisenzhecode/沥青/定性模型数据项12-11.xlsx and b/aisenzhecode/沥青/定性模型数据项12-11.xlsx differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl index 3835903..86e2dff 100644 Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl and b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 deleted file mode 100644 index 47e6ce9..0000000 Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87044 and /dev/null differ diff --git a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 b/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 deleted file mode 100644 index cd43d5c..0000000 Binary files a/aisenzhecode/沥青/日度价格预测_最佳模型.pkl.r87201 and /dev/null differ diff --git a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb index 7e61e16..9482964 100644 --- a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb +++ b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb @@ -1636,7 +1636,585 @@ "1 3249.93 1 4876.81 6533.09 \n", "昨日计划提货偏差改之前 793.84\n", "昨日计划提货偏差改之后 1656.2806\n", - "**************************************************预测结果: 3622.23\n" + "**************************************************预测结果: 3622.23\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1441 2025-05-19 3600 34.8154 1 793.84 6000 110418 3630 3236.47 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1441 1 4876.81 6533.09 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, '', '', 793.84, '', 110418.01, '', 3249.9268, '', '', 6533.09]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1441 2025-05-19 3600 34.8154 1 793.84 6000 110418 3630 3236.47 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1441 1 4876.81 6533.09 \n", + "20250520\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1440 2025-05-18 3620 31.4597 1 793.84 6000 110418 3630 3249.93 \n", + "1441 2025-05-19 3600 31.4597 1 1246.01 6000 111150 3630 3236.47 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1440 1 4876.81 6533.09 \n", + "1441 1 4876.81 6817.39 \n", + "前一天的 3249.9268 \n", + "现在的 3236.4706 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1440 2025-05-18 3620 31.4597 1 793.84 6000 110418 3630 \n", + "1 1441 2025-05-19 3600 31.4597 1 1246.01 6000 111150 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3249.93 1 4876.81 6533.09 \n", + "1 3236.47 1 4876.81 6817.39 \n", + "昨日计划提货偏差改之前 1246.01\n", + "昨日计划提货偏差改之后 1940.5766000000003\n", + "**************************************************预测结果: 3622.81\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1442 2025-05-20 3600 31.4597 1 1246.01 6000 111150 3630 3248.42 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1442 1 4876.81 6817.39 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 31.4597, '', 1246.01, '', 111149.96, '', 3236.4706, '', '', 6817.386]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1442 2025-05-20 3600 31.4597 1 1246.01 6000 111150 3630 3248.42 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1442 1 4876.81 6817.39 \n", + "20250521\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1441 2025-05-19 3600 31.4597 1 1246.01 6000 111150 3630 3236.47 \n", + "1442 2025-05-20 3600 28.104 1 1406.68 6000 111294 3630 3248.42 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1441 1 4876.81 6817.39 \n", + "1442 1 4876.81 6772.02 \n", + "前一天的 3236.4706 \n", + "现在的 3248.4206 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1441 2025-05-19 3600 31.4597 1 1246.01 6000 111150 3630 \n", + "1 1442 2025-05-20 3600 28.104 1 1406.68 6000 111294 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3236.47 1 4876.81 6817.39 \n", + "1 3248.42 1 4876.81 6772.02 \n", + "昨日计划提货偏差改之前 1406.68\n", + "昨日计划提货偏差改之后 1895.2120999999997\n", + "**************************************************预测结果: 3603.2\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1443 2025-05-21 3600 28.104 1 1406.68 6000 111294 3630 3303.26 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1443 1 4876.81 6772.02 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 28.104, '', 1406.68, '', 111293.57, '', 3248.4206, '', '', 6772.0215]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1443 2025-05-21 3600 28.104 1 1406.68 6000 111294 3630 3303.26 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1443 1 4876.81 6772.02 \n", + "20250522\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1442 2025-05-20 3600 28.104 1 1406.68 6000 111294 3630 3248.42 \n", + "1443 2025-05-21 3600 28.104 1 265.5 6000 110523 3630 3303.26 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1442 1 4876.81 6772.02 \n", + "1443 1 4876.81 7338.12 \n", + "前一天的 3248.4206 \n", + "现在的 3303.2591 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1442 2025-05-20 3600 28.104 1 1406.68 6000 111294 3630 \n", + "1 1443 2025-05-21 3600 28.104 1 265.5 6000 110523 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3248.42 1 4876.81 6772.02 \n", + "1 3303.26 1 4876.81 7338.12 \n", + "昨日计划提货偏差改之前 265.5\n", + "昨日计划提货偏差改之后 2461.3066\n", + "**************************************************预测结果: 3604.08\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1444 2025-05-22 3600 28.104 1 265.5 6000 110523 3630 -61269.9 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1444 1 4876.81 7338.12 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 28.104, '', 265.5, '', 110522.98, '', 3303.2591, '', '', 7338.116]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1444 2025-05-22 3600 28.104 1 265.5 6000 110523 3630 -61269.9 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1444 1 4876.81 7338.12 \n", + "20250523\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1443 2025-05-21 3600 28.104 1 265.5 6000 110523 3630 3303.26 \n", + "1444 2025-05-22 3600 24.7483 1 2446.95 6000 110929 3630 -61269.9 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1443 1 4876.81 7338.12 \n", + "1444 1 4876.81 7444.25 \n", + "前一天的 3303.2591 \n", + "现在的 -61269.9011 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1443 2025-05-21 3600 28.104 1 265.5 6000 110523 3630 \n", + "1 1444 2025-05-22 3600 24.7483 1 2446.95 6000 110929 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3303.26 1 4876.81 7338.12 \n", + "1 3303.26 1 4876.81 7444.25 \n", + "昨日计划提货偏差改之前 2446.95\n", + "昨日计划提货偏差改之后 2567.4371\n", + "**************************************************预测结果: 3604.24\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1445 2025-05-23 3600 24.7483 1 2446.95 6000 110929 3630 -65021.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1445 1 4876.81 7444.25 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 24.7483, '', 2446.95, '', 110928.95, '', -61269.9011, '', '', 7444.2465]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1445 2025-05-23 3600 24.7483 1 2446.95 6000 110929 3630 -65021.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1445 1 4876.81 7444.25 \n", + "20250524\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1444 2025-05-22 3600 24.7483 1 2446.95 6000 110929 3630 -61269.9 \n", + "1445 2025-05-23 3600 26.0067 1 318.59 6000 109961 3630 -65021.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1444 1 4876.81 7444.25 \n", + "1445 1 4876.81 7546.95 \n", + "前一天的 -61269.9011 \n", + "现在的 -65021.4299 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1444 2025-05-22 3600 24.7483 1 2446.95 6000 110929 3630 \n", + "1 1445 2025-05-23 3600 26.0067 1 318.59 6000 109961 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -61269.9 1 4876.81 7444.25 \n", + "1 -61269.9 1 4876.81 7546.95 \n", + "昨日计划提货偏差改之前 318.59\n", + "昨日计划提货偏差改之后 2670.1436000000003\n", + "**************************************************预测结果: 3604.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1446 2025-05-24 3600 26.0067 1 318.59 6000 109961 3630 -70579.8 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1446 1 4876.81 7546.95 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 26.0067, '', 318.59, '', 109961.08, '', -65021.4299, '', '', 7546.953]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1446 2025-05-24 3600 26.0067 1 318.59 6000 109961 3630 -70579.8 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1446 1 4876.81 7546.95 \n", + "20250525\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1445 2025-05-23 3600 26.0067 1 318.59 6000 109961 3630 -65021.4 \n", + "1446 2025-05-24 3600 26.0067 1 2949.09 6000 110078 3630 -70579.8 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1445 1 4876.81 7546.95 \n", + "1446 1 4876.81 6911.72 \n", + "前一天的 -65021.4299 \n", + "现在的 -70579.7584 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1445 2025-05-23 3600 26.0067 1 318.59 6000 109961 3630 \n", + "1 1446 2025-05-24 3600 26.0067 1 2949.09 6000 110078 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -65021.4 1 4876.81 7546.95 \n", + "1 -65021.4 1 4876.81 6911.72 \n", + "昨日计划提货偏差改之前 2949.09\n", + "昨日计划提货偏差改之后 2034.9106000000002\n", + "**************************************************预测结果: 3603.41\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1447 2025-05-25 3600 26.0067 1 2949.09 6000 110078 3630 -65108.7 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1447 1 4876.81 6911.72 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, '', '', 2949.09, '', 110078.22, '', -70579.7584, '', '', 6911.72]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1447 2025-05-25 3600 26.0067 1 2949.09 6000 110078 3630 -65108.7 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1447 1 4876.81 6911.72 \n", + "20250526\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1446 2025-05-24 3600 26.0067 1 2949.09 6000 110078 3630 -70579.8 \n", + "1447 2025-05-25 3600 26.0067 1 3076.16 6000 110169 3630 -65108.7 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1446 1 4876.81 6911.72 \n", + "1447 1 4876.81 6912.92 \n", + "前一天的 -70579.7584 \n", + "现在的 -65108.6941 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1446 2025-05-24 3600 26.0067 1 2949.09 6000 110078 3630 \n", + "1 1447 2025-05-25 3600 26.0067 1 3076.16 6000 110169 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -70579.8 1 4876.81 6911.72 \n", + "1 -70579.8 1 4876.81 6912.92 \n", + "昨日计划提货偏差改之前 3076.16\n", + "昨日计划提货偏差改之后 2036.1106\n", + "**************************************************预测结果: 3603.42\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1448 2025-05-26 3600 26.0067 1 3076.16 6000 110169 3630 -62606.2 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1448 1 4876.81 6912.92 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, '', '', 3076.16, '', 110168.92, '', -65108.6941, '', '', 6912.92]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1448 2025-05-26 3600 26.0067 1 3076.16 6000 110169 3630 -62606.2 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1448 1 4876.81 6912.92 \n", + "20250527\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1447 2025-05-25 3600 28.943 1 3076.16 6000 110169 3630 -65108.7 \n", + "1448 2025-05-26 3600 28.104 1 -1096.24 6000 110398 3630 -62606.2 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1447 1 4876.81 6912.92 \n", + "1448 1 4876.81 7359.64 \n", + "前一天的 -65108.6941 \n", + "现在的 -62606.1562 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1447 2025-05-25 3600 28.943 1 3076.16 6000 110169 3630 \n", + "1 1448 2025-05-26 3600 28.104 1 -1096.24 6000 110398 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -65108.7 1 4876.81 6912.92 \n", + "1 -65108.7 1 4876.81 7359.64 \n", + "昨日计划提货偏差改之前 -1096.24\n", + "昨日计划提货偏差改之后 2482.8306000000002\n", + "**************************************************预测结果: 3604.11\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1449 2025-05-27 3600 28.104 1 -1096.24 6000 110398 3630 -63415.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1449 1 4876.81 7359.64 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 28.104, '', -1096.24, '', 110398.08, '', -62606.1562, '', '', 7359.64]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1449 2025-05-27 3600 28.104 1 -1096.24 6000 110398 3630 -63415.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1449 1 4876.81 7359.64 \n", + "20250528\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1448 2025-05-26 3600 28.104 1 -1096.24 6000 110398 3630 -62606.2 \n", + "1449 2025-05-27 3600 31.4597 1 1608.74 6000 109292 3630 -63415.4 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1448 1 4876.81 7359.64 \n", + "1449 1 4876.81 7709.12 \n", + "前一天的 -62606.1562 \n", + "现在的 -63415.4022 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1448 2025-05-26 3600 28.104 1 -1096.24 6000 110398 3630 \n", + "1 1449 2025-05-27 3600 31.4597 1 1608.74 6000 109292 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -62606.2 1 4876.81 7359.64 \n", + "1 -63415.4 1 4876.81 7709.12 \n", + "昨日计划提货偏差改之前 1608.74\n", + "昨日计划提货偏差改之后 2832.3106\n", + "**************************************************预测结果: 3442.34\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1450 2025-05-28 3600 31.4597 1 1608.74 6000 109292 3630 3045.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1450 1 4876.81 7709.12 \n", + "日期存在,即将更新\n", + "新数据 [3600.0, 31.4597, '', 1608.74, '', 109291.7, '', -63415.4022, '', '', 7709.12]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1450 2025-05-28 3600 31.4597 1 1608.74 6000 109292 3630 3045.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1450 1 4876.81 7709.12 \n", + "20250529\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1449 2025-05-27 3600 31.4597 1 1608.74 6000 109292 3630 -63415.4 \n", + "1450 2025-05-28 3620 32.2987 1 -1155.78 6000 109867 3630 3045.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1449 1 4876.81 7709.12 \n", + "1450 1 4876.81 7515.76 \n", + "前一天的 -63415.4022 \n", + "现在的 3045.1686 \n", + "修改了\n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1449 2025-05-27 3600 31.4597 1 1608.74 6000 109292 3630 \n", + "1 1450 2025-05-28 3620 32.2987 1 -1155.78 6000 109867 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 -63415.4 1 4876.81 7709.12 \n", + "1 -63415.4 1 4876.81 7515.76 \n", + "昨日计划提货偏差改之前 -1155.78\n", + "昨日计划提货偏差改之后 2638.9506\n", + "**************************************************预测结果: 3603.86\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1451 2025-05-29 3620 32.2987 1 -1155.78 6000 109867 3630 3142.84 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1451 1 4876.81 7515.76 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, 32.2987, '', -1155.78, '', 109866.97, '', 3045.1686, '', '', 7515.76]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1451 2025-05-29 3620 32.2987 1 -1155.78 6000 109867 3630 3142.84 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1451 1 4876.81 7515.76 \n", + "20250530\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1450 2025-05-28 3620 32.2987 1 -1155.78 6000 109867 3630 3045.17 \n", + "1451 2025-05-29 3620 31.4597 1 1468.98 6000 110016 3630 3142.84 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1450 1 4876.81 7515.76 \n", + "1451 1 4876.81 7321.16 \n", + "前一天的 3045.1686 \n", + "现在的 3142.8424 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1450 2025-05-28 3620 32.2987 1 -1155.78 6000 109867 3630 \n", + "1 1451 2025-05-29 3620 31.4597 1 1468.98 6000 110016 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3045.17 1 4876.81 7515.76 \n", + "1 3142.84 1 4876.81 7321.16 \n", + "昨日计划提货偏差改之前 1468.98\n", + "昨日计划提货偏差改之后 2444.3505999999998\n", + "**************************************************预测结果: 3623.59\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1452 2025-05-30 3650 31.4597 1 1468.98 6000 110016 3630 3002.11 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1452 1 4876.81 7321.16 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, 31.4597, '', 1468.98, '', 110015.78, '', 3142.8424, '', '', 7321.16]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1452 2025-05-30 3650 31.4597 1 1468.98 6000 110016 3630 3002.11 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1452 1 4876.81 7321.16 \n", + "20250531\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1451 2025-05-29 3620 31.4597 1 1468.98 6000 110016 3630 3142.84 \n", + "1452 2025-05-30 3650 31.4597 1 200.53 6000 109452 3630 3002.11 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1451 1 4876.81 7321.16 \n", + "1452 1 4876.81 7210.15 \n", + "前一天的 3142.8424 \n", + "现在的 3002.1107 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1451 2025-05-29 3620 31.4597 1 1468.98 6000 110016 3630 \n", + "1 1452 2025-05-30 3650 31.4597 1 200.53 6000 109452 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3142.84 1 4876.81 7321.16 \n", + "1 3002.11 1 4876.81 7210.15 \n", + "昨日计划提货偏差改之前 200.53\n", + "昨日计划提货偏差改之后 2333.3405999999995\n", + "**************************************************预测结果: 3595.27\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1453 2025-05-31 3650 31.4597 1 200.53 6000 109452 3630 2931.21 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1453 1 4876.81 7210.15 \n", + "日期存在,即将更新\n", + "新数据 [3650.0, 31.4597, '', 200.53, '', 109452.46, '', 3002.1107, '', '', 7210.15]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1453 2025-05-31 3650 31.4597 1 200.53 6000 109452 3630 2931.21 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1453 1 4876.81 7210.15 \n", + "20250601\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1452 2025-05-30 3650 31.4597 1 200.53 6000 109452 3630 3002.11 \n", + "1453 2025-05-31 3650 31.4597 1 200.53 6000 109452 3630 2931.21 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1452 1 4876.81 7210.15 \n", + "1453 1 4876.81 7210.15 \n", + "前一天的 3002.1107 \n", + "现在的 2931.2118 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1452 2025-05-30 3650 31.4597 1 200.53 6000 109452 3630 \n", + "1 1453 2025-05-31 3650 31.4597 1 200.53 6000 109452 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3002.11 1 4876.81 7210.15 \n", + "1 2931.21 1 4876.81 7210.15 \n", + "昨日计划提货偏差改之前 200.53\n", + "昨日计划提货偏差改之后 2333.3405999999995\n", + "**************************************************预测结果: 3653.42\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1454 2025-06-01 3650 31.4597 1 200.53 6000 109452 3630 2940.43 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1454 1 4876.81 7210.15 \n", + "日期存在,即将更新\n", + "新数据 [3650.0, '', '', 4837.68, '', 109979.05, '', 2931.2118, '', '', 6263.17]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1454 2025-06-01 3650 31.4597 1 200.53 6000 109452 3630 2940.43 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1454 1 4876.81 7210.15 \n", + "20250602\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1453 2025-05-31 3650 31.4597 1 4837.68 6000 109979 3630 2931.21 \n", + "1454 2025-06-01 3620 31.4597 1 2998.04 6000 112404 3630 2940.43 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1453 1 4876.81 6263.17 \n", + "1454 1 4876.81 8453.8 \n", + "前一天的 2931.2118 \n", + "现在的 2940.4318 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1453 2025-05-31 3650 31.4597 1 4837.68 6000 109979 3630 \n", + "1 1454 2025-06-01 3620 31.4597 1 2998.04 6000 112404 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 2931.21 1 4876.81 6263.17 \n", + "1 2940.43 1 4876.81 8453.8 \n", + "昨日计划提货偏差改之前 2998.04\n", + "昨日计划提货偏差改之后 3576.9925999999996\n", + "**************************************************预测结果: 3655.35\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1455 2025-06-02 3620 31.4597 1 2998.04 6000 112404 3630 3088.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1455 1 4876.81 8453.8 \n", + "日期存在,即将更新\n", + "新数据 [3620.0, '', '', 2998.04, '', 112404.0, '', 2940.4318, '', '', 8453.802]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1455 2025-06-02 3620 31.4597 1 2998.04 6000 112404 3630 3088.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1455 1 4876.81 8453.8 \n", + "20250603\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1454 2025-06-01 3620 31.4597 1 2998.04 6000 112404 3630 2940.43 \n", + "1455 2025-06-02 3650 31.4597 1 -361.55 6000 113137 3630 3088.17 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1454 1 4876.81 8453.8 \n", + "1455 1 4876.81 7274.36 \n", + "前一天的 2940.4318 \n", + "现在的 3088.1711 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1454 2025-06-01 3620 31.4597 1 2998.04 6000 112404 3630 \n", + "1 1455 2025-06-02 3650 31.4597 1 -361.55 6000 113137 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 2940.43 1 4876.81 8453.8 \n", + "1 3088.17 1 4876.81 7274.36 \n", + "昨日计划提货偏差改之前 -361.55\n", + "昨日计划提货偏差改之后 2397.5505999999996\n", + "**************************************************预测结果: 3653.07\n", + "更新前一天数据\n", + "更新数据前\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1456 2025-06-03 3650 31.4597 1 -361.55 6000 113137 3630 3144.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1456 1 4876.81 7274.36 \n", + "日期存在,即将更新\n", + "新数据 [3650.0, '', '', -361.55, '', 113137.24, '', 3088.1711, '', '', 7274.36]\n", + "更新数据后\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1456 2025-06-03 3650 31.4597 1 -361.55 6000 113137 3630 3144.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1456 1 4876.81 7274.36 \n", + "20250604\n", + " 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 即期成本 \\\n", + "1455 2025-06-02 3650 33.557 1 -361.55 6000 113137 3630 3088.17 \n", + "1456 2025-06-03 3650 34.8154 1 -676.29 6000 113920 3630 3144.69 \n", + "\n", + " 订单结构 计划产量 京博产量 \n", + "1455 1 4876.81 7274.36 \n", + "1456 1 4876.81 7404.7 \n", + "前一天的 3088.1711 \n", + "现在的 3144.6905 \n", + " index 日期 京博指导价 70号沥青开工率 资金因素 昨日计划提货偏差 生产情况 基质沥青库存 下游客户价格预期 \\\n", + "0 1455 2025-06-02 3650 33.557 1 -361.55 6000 113137 3630 \n", + "1 1456 2025-06-03 3650 34.8154 1 -676.29 6000 113920 3630 \n", + "\n", + " 即期成本 订单结构 计划产量 京博产量 \n", + "0 3088.17 1 4876.81 7274.36 \n", + "1 3144.69 1 4876.81 7404.7 \n", + "昨日计划提货偏差改之前 -676.29\n", + "昨日计划提货偏差改之后 2527.8866\n", + "**************************************************预测结果: 3653.59\n" ] } ], diff --git a/aisenzhecode/沥青/沥青定性每日执行.py b/aisenzhecode/沥青/沥青定性每日执行.py new file mode 100644 index 0000000..0656f30 --- /dev/null +++ b/aisenzhecode/沥青/沥青定性每日执行.py @@ -0,0 +1,1117 @@ +import requests +import json +import xlrd +import xlwt +from datetime import datetime, timedelta +import time +import pandas as pd +pd.set_option('display.max_columns', None) + +import numpy as np +# 变量定义 +login_url = "http://10.200.32.39/jingbo-api/api/server/login" +login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" +# query_data_list_item_nos_url +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" #jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos +upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" +queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos" + + +query_data_list_item_nos_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": "20200101", + "dateEnd": "20241231", + "dataItemNoList": ["Brentzdj", "Brentzgj"] # 数据项编码,代表 brent最低价和最高价 + } +} + + +login_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +login_push_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + + +read_file_path_name = "定性模型数据项12-11.xlsx" +one_cols = [] +two_cols = [] + +def get_head_auth(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + +def get_head_push_auth(): + login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + +def update_e_value(file_path, column_index, threshold): + """ + 数据修正需求:2025年1月8日 + 如果如果今天的成本即期价跟昨天的成本价差正负1000以上,就按照昨天的成本价计算 + + 更新Excel文件中指定列的值,如果新值与前一天的值变化大于阈值,则将新值改为前一天的值。 + + :param file_path: Excel文件路径 + :param column_index: 需要更新的列索引 + :param threshold: 变化阈值 + """ + # 读取Excel文件 + # try: + # df = pd.read_excel(file_path, engine='openpyxl') + # except: + # df = pd.read_excel(file_path, engine='xlrd') + + df = pd.read_excel(file_path) + # 所有列列统一数据格式为float + df = df.applymap(lambda x: float(x) if isinstance(x, (int, float)) else x) + + # print(df.tail()) + # 填充缺失值 + df = df.fillna(method='ffill') + + # 获取昨天,前天数据 + df1 = df[-3:-1] + print(df1) + # 获取前一天的指定列值 + previous_value = df1.iloc[0, column_index] + print('前一天的',previous_value,type(previous_value)) + # 获取当前的指定列值 + current_value = df1.iloc[1, column_index] + print('现在的',current_value,type(current_value)) + # 判断指定列值的变化是否大于阈值 + if abs(current_value - previous_value) > threshold: + # 如果变化大于阈值,将当前的指定列值改为前一天的值 + df.iloc[-2, column_index] = previous_value + print('修改了') + # print(df.tail()) + # 将修改后的数据写回Excel文件 + df.to_excel(file_path, index=False,engine='openpyxl') + +def getLogToken(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + print("获取认证失败") + token = None + return token + +def updateExcelDatabak(date='',token=None): + workbook = xlrd.open_workbook(read_file_path_name) + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + row_data = sheet.row_values(1) + one_cols = row_data + + cur_time,cur_time2 = getNow(date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] +# datas = search_value + if search_value: + datas = search_value + else : + datas = None + + + append_rows = [cur_time2] + dataItemNo_dataValue = {} +# for data_value in datas: +# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("定性模型数据项12-11.xlsx") + +def updateYesterdayExcelData(date='', token=None): + # 使用pandas读取Excel文件 + df = pd.read_excel(read_file_path_name, engine='openpyxl') + + # 获取第二行的数据作为列名 + one_cols = df.iloc[0,:].tolist() + + # 获取当前日期的前一天 + if date == '': + previous_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') + else: + # 字符串转日期 + previous_date = (datetime.strptime(date, "%Y-%m-%d")-timedelta(days=1)).strftime('%Y-%m-%d') + + + cur_time, cur_time2 = getNow(previous_date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + print('更新数据前') + print(df.tail(1)) + # 检查日期是否已存在于数据中 + if previous_date not in df['日期'].values: + # 将新的数据添加到DataFrame中 + new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) + df = pd.concat([df, new_row], ignore_index=True) + else: + # 更新现有数据 + print('日期存在,即将更新') + print('新数据',append_rows[1:]) + df.loc[df['日期'] == previous_date, df.columns.tolist()[1:]] = append_rows[1:] + + print('更新数据后') + print(df.tail(1)) + # 使用pandas保存Excel文件 + df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') + + +def updateExcelData(date='', token=None): + # 使用pandas读取Excel文件 + df = pd.read_excel(read_file_path_name, engine='openpyxl') + + # 获取第一行的数据作为列名 + # one_cols = df.columns.tolist() + + # 获取第二行的数据作为列名 + one_cols = df.iloc[0,:].tolist() + + cur_time, cur_time2 = getNow(date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + # 将新的数据添加到DataFrame中 + new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) + df = pd.concat([df, new_row], ignore_index=True) + # df = df.append(pd.Series(append_rows), ignore_index=True) + + # 使用pandas保存Excel文件 + df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') + + +def qualitativeModel(): + df = pd.read_excel('定性模型数据项12-11.xlsx') + + df=df.fillna(df.ffill()) + df1 = df[-3:-1].reset_index() + print(df1) + ''' + # if df1.loc[1,'70号沥青开工率'] > 0.3: + 2025年1月8日 修改: + 复盘分析后发现2024-7月开始,开工率数据从0.28 变为了28 ,改为下面的判断规则 + ''' + if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: + a = -(df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 + else : + a = 0 + b = df1.loc[1,'资金因素'] + + print('昨日计划提货偏差改之前',df1.loc[1,'昨日计划提货偏差']) + # 昨日计划提货偏差 = 京博产量 - 计划产量 + df1.loc[1,'昨日计划提货偏差'] = df1.loc[1,'京博产量'] - df1.loc[1,'计划产量'] + + print('昨日计划提货偏差改之后',df1.loc[1,'昨日计划提货偏差']) + if df1.loc[1,'昨日计划提货偏差']>0: + c = df1.loc[1,'昨日计划提货偏差']*10/2000 + else : + c = df1.loc[1,'昨日计划提货偏差']*10/3000 + + # 生产情况 = (京博产量 - 计划产量)/500*5 + d = (df1.loc[1,'京博产量'] - df1.loc[1,'计划产量']) / 500 * 5 + + if df1.loc[1,'基质沥青库存']/265007 >0.8: + e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 + else : + e = 0 +# f = df1.loc[1,'下游客户价格预期'] + f = 1 # 2025年1月23日修改:价格预期都按1计算 + if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: + g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 + else : + g = 0 + h = df1.loc[1,'订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) + return x + + +def getNow(date='',offset=0): + if date == '': + now = datetime.now() - timedelta(days=offset) + else: + try: + date = datetime.strptime(date, "%Y-%m-%d") + except: + date = datetime.strptime(date, "%Y%m%d") + now = date + + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + return cur_time,cur_time2 + +def pushData(cur_time,x,token_push): + data1 = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + "dataDate": cur_time, + "dataStatus": "add", + "dataValue": x + } + ] + } + headers1 = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + +def start_2(date='',token=None): + workbook = xlrd.open_workbook(read_file_path_name) + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + + row_data = sheet.row_values(1) + one_cols = row_data + + cur_time,cur_time2 = getNow(date) + + + + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] +# datas = search_value + if search_value: + datas = search_value + else : + datas = None + + + append_rows = [cur_time2] + dataItemNo_dataValue = {} +# for data_value in datas: +# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("定性模型数据项12-11.xlsx") + + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + + df = pd.read_excel('定性模型数据项12-11.xlsx') + + df=df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + ''' + # if df1.loc[1,'70号沥青开工率'] > 0.3: + 2025年1月8日 修改: + 复盘分析后发现2024-7月开始,开工率数据从0.28 变为了28 ,改为下面的判断规则 + ''' + if df1.loc[1,'70号沥青开工率'] > 30: + a = (df1.loc[1,'70号沥青开工率']-0.2)*5/0.1 + else : + a = 0 + b = df1.loc[1,'资金因素'] + if df1.loc[1,'昨日计划提货偏差']>0: + c = df1.loc[1,'昨日计划提货偏差']*10/2000 + else : + c = df1.loc[1,'昨日计划提货偏差']*10/3000 + d = df1.loc[1,'生产情况'] + if df1.loc[1,'基质沥青库存']/265007 >0.8: + e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 + else : + e = 0 +# f = df1.loc[1,'下游客户价格预期'] + f = 1 # 2025年1月23日修改:价格预期都按1计算 + if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: + g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 + else : + g = 0 + h = df1.loc[1,'订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) + + login_res1 = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text1 = json.loads(login_res1.text) + token_push = text1["data"]["accessToken"] + + + data1 = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + "dataDate": cur_time, + "dataStatus": "add", + "dataValue": x + } + + ] + } + headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + + +def start(now=None): + workbook = xlrd.open_workbook(read_file_path_name) + + + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + + + + row_data = sheet.row_values(1) + one_cols = row_data + + + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + print("获取认证失败") + token = None + + if now is None: + now = datetime.now() + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] +# datas = search_value + if search_value: + datas = search_value + else : + datas = None + + + append_rows = [cur_time2] + dataItemNo_dataValue = {} +# for data_value in datas: +# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("定性模型数据项12-11.xlsx") + + + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + + df = pd.read_excel('定性模型数据项12-11.xlsx') + df=df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + print(df1) + # if df1.loc[1,'70号沥青开工率'] > 0.3: -- 2025年1月9日 发版更改 + if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: + a = (df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 + else : + a = 0 + b = df1.loc[1,'资金因素'] + if df1.loc[1,'昨日计划提货偏差']>0: + c = df1.loc[1,'昨日计划提货偏差']*10/2000 + else : + c = df1.loc[1,'昨日计划提货偏差']*10/3000 + d = df1.loc[1,'生产情况'] + if df1.loc[1,'基质沥青库存']/265007 >0.8: + e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 + else : + e = 0 +# f = df1.loc[1,'下游客户价格预期'] + f = 1 # 2025年1月23日修改:价格预期都按1计算 + if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: + g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 + else : + g = 0 + h = df1.loc[1,'订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) + + + # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) + # text1 = json.loads(login_res1.text) + # token_push = text1["data"]["accessToken"] + + + # data1 = { + # "funcModule": "数据表信息列表", + # "funcOperation": "新增", + # "data": [ + # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + # "dataDate": cur_time, + # "dataStatus": "add", + # "dataValue": x + # } + + # ] + # } + # headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + + + +def start_test(): + workbook = xlrd.open_workbook(read_file_path_name) + + + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + + + + row_data = sheet.row_values(1) + one_cols = row_data + + + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + print("获取认证失败") + token = None + + + now = datetime.now() + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] +# datas = search_value + if search_value: + datas = search_value + else : + datas = None + + + append_rows = [cur_time2] + dataItemNo_dataValue = {} +# for data_value in datas: +# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("定性模型数据项12-11.xlsx") + + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + + df = pd.read_excel('定性模型数据项12-11.xlsx') + df=df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + # if df1.loc[1,'70号沥青开工率'] > 0.3: -- 2025年1月9日 发版更改 + if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: + a = (df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 + else : + a = 0 + b = df1.loc[1,'资金因素'] + if df1.loc[1,'昨日计划提货偏差']>0: + c = df1.loc[1,'昨日计划提货偏差']*10/2000 + else : + c = df1.loc[1,'昨日计划提货偏差']*10/3000 + d = df1.loc[1,'生产情况'] + if df1.loc[1,'基质沥青库存']/265007 >0.8: + e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 + else : + e = 0 +# f = df1.loc[1,'下游客户价格预期'] + f = 1 # 2025年1月23日修改:价格预期都按1计算 + if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: + g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 + else : + g = 0 + h = df1.loc[1,'订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) + + + # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) + # text1 = json.loads(login_res1.text) + # token_push = text1["data"]["accessToken"] + + + # data1 = { + # "funcModule": "数据表信息列表", + # "funcOperation": "新增", + # "data": [ + # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + # "dataDate": cur_time, + # "dataStatus": "add", + # "dataValue": x + # } + + # ] + # } + # headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + + + + + +def start_1(): + workbook = xlrd.open_workbook(read_file_path_name) + + + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + + + + row_data = sheet.row_values(1) + one_cols = row_data + + + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + print("获取认证失败") + token = None + + + now = datetime.now() - timedelta(days=1) + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] +# datas = search_value + if search_value: + datas = search_value + else : + datas = None + + + + append_rows = [cur_time2] + dataItemNo_dataValue = {} +# for data_value in datas: +# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows - 1 + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("定性模型数据项12-11.xlsx") + + + + + + + +def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): + + search_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": dateStart, + "dateEnd": dateEnd, + "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 + } + } + + headers = {"Authorization": token} + search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + return None + + + +def save_queryDataListItemNos_xls(data_df,dataItemNoList): + + current_year_month = datetime.now().strftime('%Y-%m') + grouped = data_df.groupby("dataDate") + + + df_old = pd.read_excel('定性模型数据项12-11.xlsx') + df_old0 = df_old[:1] + result_dict = {df_old0.iloc[0][col] : col for col in df_old0.columns} + df_old1 = df_old[1:].copy() + + df_old1["日期"] = pd.to_datetime(df_old1["日期"]) + # 删除日期列为本月的数据 + df_old1 = df_old1[~df_old1["日期"].dt.strftime('%Y-%m').eq(current_year_month)] + df_old1["日期"] = df_old1["日期"].dt.strftime('%Y-%m-%d') + + + list_data = [] + for date, group in grouped: + dict_data = {"日期": date} + for index, row in group.iterrows(): + dict_data[result_dict[row['dataItemNo']]] = row['dataValue'] + list_data.append(dict_data) + + df_current_year_month = pd.DataFrame(list_data) + df_current_year_month + + df_merged = pd.concat([df_old0, df_old1, df_current_year_month], ignore_index=True) + + df_merged.to_excel('定性模型数据项12-11.xlsx', index=False) + + + +def queryDataListItemNos(date=None,token=None): + df = pd.read_excel('定性模型数据项12-11.xlsx') + dataItemNoList = df.iloc[0].tolist()[1:] + + if token is None: + token = getLogToken() + if token is None: + print("获取token失败") + return + + # 获取当前日期 + if date is None: + date = datetime.now() + current_date = date + + # 获取当月1日 + first_day_of_month = current_date.replace(day=1) + + # 格式化为 YYYYMMDD 格式 + dateEnd = current_date.strftime('%Y%m%d') + dateStart = first_day_of_month.strftime('%Y%m%d') + + search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + # print("search_value",search_value) + + + data_df = pd.DataFrame(search_value) + + data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) + data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') + save_queryDataListItemNos_xls(data_df,dataItemNoList) + +def main(start_date=None,token=None,token_push=None): + if start_date is None: + start_date = datetime.now() + if token is None: + token = get_head_auth() + if token_push is None: + token_push = get_head_push_auth() + date = start_date.strftime('%Y%m%d') + print(date) + updateExcelData(date,token) + queryDataListItemNos(token=token) + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + x = qualitativeModel() + print('**************************************************预测结果:',x) + cur_time,cur_time2 = getNow(date) + pushData(cur_time,x,token) + + +if __name__ == "__main__": + print("运行中...") + main() + diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb index bcc2226..ad9c10a 100644 --- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb @@ -2,56 +2,14 @@ "cells": [ { "cell_type": "code", -<<<<<<< .mine - "execution_count": null, -||||||| .r87044 - "execution_count": 1, -======= - "execution_count": 5, ->>>>>>> .r87201 + "execution_count": 18, "metadata": {}, "outputs": [ { -<<<<<<< .mine - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", - "\n" - ] - }, - { -||||||| .r87044 - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n", - "\n", - "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", - "\n" - ] - }, - { -======= ->>>>>>> .r87201 "data": { "text/html": [ " \n", - " \n", " " ] }, @@ -79,6 +33,8 @@ "source": [ "import requests\n", "import json\n", + "import xlrd\n", + "import xlwt\n", "from datetime import datetime, timedelta\n", "import time\n", "import pandas as pd\n", @@ -665,6 +621,7 @@ "\n", "\n", "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + " from datetime import datetime,timedelta\n", " current_year_month = datetime.now().strftime('%Y-%m')\n", " grouped = data_df.groupby(\"dataDate\")\n", "\n", @@ -767,7 +724,6 @@ "# new_workbook.save(\"沥青数据项.xlsx\")\n", "\n", "def queryDataListItemNos(token=None):\n", - " from datetime import datetime, timedelta\n", " df = pd.read_excel('沥青数据项.xlsx')\n", " dataItemNoList = df.iloc[0].tolist()[1:]\n", " \n", @@ -779,6 +735,7 @@ " return\n", "\n", " # 获取当前日期\n", + " from datetime import datetime, timedelta\n", " current_date = datetime.now()\n", "\n", " # 获取当月1日\n", @@ -921,7 +878,7 @@ "\n", "\n", "def main(start_date=None,token=None,token_push=None):\n", - " from datatime import datetime\n", + " from datetime import datetime, timedelta\n", " if start_date is None:\n", " start_date = datetime.now()\n", " if token is None:\n", @@ -941,38 +898,23 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { -<<<<<<< .mine - "ename": "AttributeError", - "evalue": "module 'datetime' has no attribute 'now'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n", - "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n", - "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'" -||||||| .r87044 - "name": "stdout", - "output_type": "stream", - "text": [ - "运行中ing...\n" -======= "name": "stdout", "output_type": "stream", "text": [ + "运行中ing...\n", + "执行定时任务\n", "20250522\n" ->>>>>>> .r87201 ] }, { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n", + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", "\n", "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", "\n" @@ -982,25 +924,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "当月数据更新完成\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n", - "\n", - "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using matplotlib backend: inline\n", - "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", "Populating the interactive namespace from numpy and matplotlib\n", "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" ] @@ -1009,9 +934,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n", + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:160: UserWarning:\n", "\n", - "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n", + "pylab import has clobbered these variables: ['datetime', 'random', 'plot', '__version__']\n", "`%matplotlib` prevents importing * from pylab and numpy\n", "\n" ] @@ -1020,24 +945,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "Best score: 0.997\n", + "Best score: 0.996\n", "Best parameters set:\n", "\tlearning_rate: 0.1\n", - "\tmax_depth: 8\n", - "\tn_estimators: 90\n" + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-22 3596.835693\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250522\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n", + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", "\n", - "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", - "\n", - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n", - "\n", - "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", "\n" ] }, @@ -1045,8 +972,823 @@ "name": "stdout", "output_type": "stream", "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", "日期\n", - "2025-05-22 3600.097412\n", + "2025-05-22 3596.835693\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250523\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-23 3599.072754\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250523\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-23 3599.072754\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-24 3599.731201\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-24 3599.731201\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250525\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-25 3595.706055\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250525\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-25 3595.706055\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250526\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-26 3599.215576\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250526\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-26 3599.215576\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250527\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-27 3598.600586\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250527\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-27 3598.600586\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250528\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-28 3599.99585\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250528\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-28 3599.99585\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250529\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-29 3616.390869\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250529\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 8\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-29 3616.390869\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250530\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-30 3611.802246\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250530\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-30 3611.802246\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250531\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-31 3640.084229\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250531\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-05-31 3640.084229\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250601\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-01 3646.292236\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250601\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-01 3646.292236\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250602\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-02 3619.931885\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250602\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-02 3619.931885\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250603\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-03 3650.379883\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250603\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-03 3650.379883\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250604\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-04 3649.349121\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "执行定时任务\n", + "20250604\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:618: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Best score: 0.996\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n", + "日期\n", + "2025-06-04 3649.349121\n", "Name: 日度预测价格, dtype: float32\n", "{\"confirmFlg\":false,\"status\":true}\n" ] @@ -1054,29 +1796,14 @@ ], "source": [ "\n", - "# if __name__ == \"__main__\":\n", - "# print(\"运行中ing...\")\n", - "# # 每天定时12点运行\n", - "# while True:\n", - "# # 获取当前时间\n", - "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", - "# # print(current_time)\n", + "if __name__ == \"__main__\":\n", + " print(\"运行中ing...\")\n", + " # 每天定时12点运行\n", + " while True:\n", + " # 获取当前时间\n", + " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # print(current_time)\n", "\n", -<<<<<<< .mine - "# # 判断当前时间是否为执行任务的时间点\n", - "# try:\n", - "# if current_time == \"09:13:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# elif current_time == \"09:18:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# time.sleep(1)\n", - "# except :\n", - "# print(f\"{current_time}任务失败\")\n", - "main()\n", - "main()\n", -||||||| .r87044 " # 判断当前时间是否为执行任务的时间点\n", " try:\n", " if current_time == \"09:13:00\":\n", @@ -1089,28 +1816,15 @@ " except :\n", " print(f\"{current_time}任务失败\")\n", "# main()\n", -======= - "# # 判断当前时间是否为执行任务的时间点\n", - "# try:\n", - "# if current_time == \"09:13:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# elif current_time == \"09:18:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# time.sleep(1)\n", - "# except :\n", - "# print(f\"{current_time}任务失败\")\n", - "main()\n", ->>>>>>> .r87201 + "# main()\n", " # 检测数据准确性, 需要检测放开\n", " # check_data(\"100028098|LISTING_PRICE\")\n", - " # check_data(\"9137070016544622XB|DAY_Yield\")" + " # check_data(\"9137070016544622XB|DAY_Yield\")\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1127,7 +1841,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1237,7 +1951,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -1251,13 +1965,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", -<<<<<<< .mine - "version": "3.11.7" -||||||| .r87044 "version": "3.7.0" -======= - "version": "3.11.11" ->>>>>>> .r87201 } }, "nbformat": 4, diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine deleted file mode 100644 index 5e7fb95..0000000 --- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.mine +++ /dev/null @@ -1,1100 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import requests\n", - "import json\n", - "import xlrd\n", - "import xlwt\n", - "from datetime import datetime, timedelta\n", - "import time\n", - "import pandas as pd\n", - "\n", - "# 变量定义\n", - "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", - "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", - "\n", - "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", - "\n", - "login_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "login_push_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "read_file_path_name = \"沥青数据项.xlsx\"\n", - "one_cols = []\n", - "two_cols = []\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sn\n", - "import random\n", - "import time\n", - "\n", - "\n", - "\n", - "\n", - "from plotly import __version__\n", - "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", - "\n", - "from sklearn import preprocessing\n", - "\n", - "from pandas import Series,DataFrame\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import sklearn.datasets as datasets\n", - "\n", - "#导入机器学习算法模型\n", - "from sklearn.linear_model import Lasso\n", - "from xgboost import XGBRegressor\n", - "\n", - "import statsmodels.api as sm\n", - "# from keras.preprocessing.sequence import TimeseriesGenerator\n", - "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "\n", - "import xgboost as xgb\n", - "from xgboost import plot_importance, plot_tree\n", - "from sklearn.metrics import mean_absolute_error\n", - "from statsmodels.tools.eval_measures import mse,rmse\n", - "from sklearn.model_selection import GridSearchCV\n", - "from xgboost import XGBRegressor\n", - "import warnings\n", - "import pickle\n", - "\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "#切割训练数据和样本数据\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "#用于模型评分\n", - "from sklearn.metrics import r2_score\n", - "\n", - "le = preprocessing.LabelEncoder()\n", - "\n", - "# print(__version__) # requires version >= 1.9.0\n", - "\n", - "\n", - "import cufflinks as cf\n", - "cf.go_offline()\n", - "\n", - "random.seed(100)\n", - "\n", - "%matplotlib inline\n", - "\n", - "# 数据获取\n", - "\n", - "def get_head_auth():\n", - " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "\n", - "def get_data_value(token, dataItemNoList,date=''):\n", - "\n", - " search_data = {\n", - " \"data\": {\n", - " \"date\": getNow(date)[0],\n", - " \"dataItemNoList\": dataItemNoList\n", - " },\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\"\n", - " }\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " print(\"今天没有新数据\")\n", - " return None\n", - "\n", - "\n", - "# xls文件处理\n", - "\n", - "\n", - "\n", - "def getNow(date='', offset=0):\n", - " \"\"\"生成指定日期的两种格式字符串\n", - " Args:\n", - " date: 支持多种输入类型:\n", - " - datetime对象\n", - " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", - " - 空字符串表示当前日期\n", - " offset: 日期偏移天数\n", - " Returns:\n", - " tuple: (紧凑日期字符串, 标准日期字符串)\n", - " \"\"\"\n", - " # 日期解析逻辑\n", - " from datetime import datetime,timedelta\n", - " if isinstance(date, datetime):\n", - " now = date\n", - " else:\n", - " now = datetime.now()\n", - " if date:\n", - " # 尝试多种日期格式解析\n", - " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", - " try:\n", - " now = datetime.strptime(str(date), fmt)\n", - " break\n", - " except ValueError:\n", - " continue\n", - " else:\n", - " raise ValueError(f\"无法解析的日期格式: {date}\")\n", - "\n", - " # 应用日期偏移\n", - " now = now - timedelta(days=offset)\n", - " \n", - " # 统一格式化输出\n", - " date_str = now.strftime(\"%Y-%m-%d\")\n", - " compact_date = date_str.replace(\"-\", \"\")\n", - " return compact_date, date_str\n", - "\n", - "\n", - "\n", - "# def get_cur_time(date = ''):\n", - "# if date == '':\n", - "# now = datetime.now()\n", - "# else:\n", - "# now = date\n", - "# year = now.year\n", - "# month = now.month\n", - "# day = now.day\n", - "\n", - "# if month < 10:\n", - "# month = \"0\" + str(month)\n", - "# if day < 10:\n", - "# day = \"0\" + str(day)\n", - "# cur_time = str(year) + str(month) + str(day)\n", - "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", - "# # cur_time = '20231007'\n", - "# # cur_time2 = '2023-10-07'\n", - "# return cur_time, cur_time2\n", - "\n", - "\n", - "def get_head_push_auth():\n", - " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "def upload_data_to_system(token_push,date):\n", - " data = {\n", - " \"funcModule\": \"数据表信息列表\",\n", - " \"funcOperation\": \"新增\",\n", - " \"data\": [\n", - " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", - " \"dataDate\": getNow(date)[0],\n", - " \"dataStatus\": \"add\",\n", - " \"dataValue\": forecast_price()\n", - " }\n", - "\n", - " ]\n", - " }\n", - " headers = {\"Authorization\": token_push}\n", - " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", - " print(res.text)\n", - "\n", - "def forecast_price():\n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", - " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - "\n", - " #查看每个特征缺失值数量\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - "\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - "\n", - " # 选择用于模型训练的列名称\n", - " col_for_training = df_test_1.columns\n", - " import joblib\n", - " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", - " # 最新的一天为最后一行的数据\n", - " df_test_1_Day = df_test_1.tail(1)\n", - " # 移除不需要的列\n", - " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", - " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", - " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", - " df_test_1_Day=df_test_1_Day.dropna()\n", - "\n", - " # df_test_1_Day\n", - " #预测今日价格,显示至小数点后两位\n", - " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", - "\n", - " df_test_1_Day['日度预测价格']=Ypredict_Today\n", - " print(df_test_1_Day['日度预测价格'])\n", - " a = df_test_1_Day['日度预测价格']\n", - " a = a[0]\n", - " a = float(a)\n", - " a = round(a,2)\n", - " return a\n", - "def optimize_Model():\n", - " from sklearn.model_selection import train_test_split\n", - " from sklearn.impute import SimpleImputer\n", - " from sklearn.preprocessing import OrdinalEncoder\n", - " from sklearn.feature_selection import SelectFromModel\n", - " from sklearn.metrics import mean_squared_error, r2_score\n", - "\n", - " pd.set_option('display.max_rows',40) \n", - " pd.set_option('display.max_columns',40) \n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - " # df_test.tail(3)\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", - " df_test_1.index = df_test_1[\"日期\"]\n", - " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", - " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", - "\n", - " y=df_test_1['京博指导价']\n", - "\n", - " x=dataset1 \n", - "\n", - " train = x\n", - " target = y\n", - "\n", - " #切割数据样本集合测试集\n", - " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", - " \n", - " \n", - " from sklearn.linear_model import Lasso\n", - " from xgboost import XGBRegressor\n", - "\n", - " import statsmodels.api as sm\n", - " # from keras.preprocessing.sequence import TimeseriesGenerator\n", - " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - " import plotly.express as px\n", - " import plotly.graph_objects as go\n", - "\n", - " import xgboost as xgb\n", - " from xgboost import plot_importance, plot_tree\n", - " from sklearn.metrics import mean_absolute_error\n", - " from statsmodels.tools.eval_measures import mse,rmse\n", - " from sklearn.model_selection import GridSearchCV\n", - " from xgboost import XGBRegressor\n", - " import warnings\n", - " import pickle\n", - "\n", - " from sklearn.metrics import mean_squared_error\n", - "\n", - " #切割训练数据和样本数据\n", - " from sklearn.model_selection import train_test_split\n", - "\n", - " #用于模型评分\n", - " from sklearn.metrics import r2_score\n", - "\n", - " #模型缩写\n", - " Lasso = Lasso(random_state=0)\n", - " XGBR = XGBRegressor(random_state=0)\n", - " Lasso.fit(X_train,y_train)\n", - " XGBR.fit(X_train,y_train)\n", - " y_pre_Lasso = Lasso.predict(x_test)\n", - " y_pre_XGBR = XGBR.predict(x_test)\n", - "\n", - " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", - " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", - " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", - "\n", - " #计算Lasso、XGBR的MSE和RMSE\n", - " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", - " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", - "\n", - " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", - " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", - " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", - " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", - " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", - " model_results1=model_results.set_index('模型(Model)')\n", - "\n", - " def plot_feature_importance(importance,names,model_type):\n", - " feature_importance = np.array(importance)\n", - " feature_names = np.array(names)\n", - "\n", - " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", - " fi_df = pd.DataFrame(data)\n", - "\n", - " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", - "\n", - " plt.figure(figsize=(10,8))\n", - " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", - "\n", - " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", - " plt.xlabel('FEATURE IMPORTANCE')\n", - " plt.ylabel('FEATURE NAMES')\n", - " from pylab import mpl\n", - " %pylab\n", - " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", - " from xgboost import XGBRegressor\n", - " from sklearn.model_selection import GridSearchCV\n", - "\n", - " estimator = XGBRegressor(random_state=0,\n", - " nthread=4,\n", - " seed=0\n", - " )\n", - " parameters = {\n", - " 'max_depth': range (2, 11, 2), # 树的最大深度\n", - " 'n_estimators': range (50, 101, 10), # 迭代次数\n", - " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", - " }\n", - "\n", - " grid_search_XGB = GridSearchCV(\n", - " estimator=estimator,\n", - " param_grid=parameters,\n", - " # n_jobs = 10,\n", - " cv = 3,\n", - " verbose=True\n", - " )\n", - "\n", - " grid_search_XGB.fit(X_train, y_train)\n", - " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", - " print(\"Best parameters set:\")\n", - " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", - " for param_name in sorted(parameters.keys()):\n", - " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", - " y_pred = grid_search_XGB.predict(x_test)\n", - "\n", - " op_XGBR_score = r2_score(y_true,y_pred)\n", - " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", - " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", - "\n", - " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", - " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", - " model_results2=model_results2.set_index('模型(Model)')\n", - "\n", - " # results = model_results1.append(model_results2, ignore_index = False)\n", - " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", - " results\n", - " import pickle\n", - "\n", - " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", - "\n", - " with open(Pkl_Filename, 'wb') as file: \n", - " pickle.dump(grid_search_XGB, file)\n", - "\n", - "def read_xls_data():\n", - " \"\"\"获取特征项ID\"\"\"\n", - " global one_cols, two_cols\n", - " # 使用pandas读取Excel文件\n", - " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", - " # 获取第二行数据(索引为1)\n", - " one_cols = df.iloc[1].tolist()[1:]\n", - " print(f'获取到的数据项ID{one_cols}')\n", - "\n", - "# def read_xls_data():\n", - "# global one_cols, two_cols\n", - "# # 打开 XLS 文件\n", - "# workbook = xlrd.open_workbook(read_file_path_name)\n", - "\n", - "# # 获取所有表格名称\n", - "# # sheet_names = workbook.sheet_names()\n", - "\n", - "# # 选择第一个表格\n", - "# sheet = workbook.sheet_by_index(0)\n", - "\n", - "# # 获取行数和列数\n", - "# num_rows = sheet.nrows\n", - "# # num_cols = sheet.ncols\n", - "\n", - "# # 遍历每一行,获取单元格数据\n", - "# # for i in range(num_rows):\n", - "# # row_data = sheet.row_values(i)\n", - "# # one_cols.append(row_data)\n", - "# # two_cols.append(row_data[1])\n", - "\n", - "# row_data = sheet.row_values(1)\n", - "# one_cols = row_data\n", - "\n", - "# # 关闭 XLS 文件\n", - "# # workbook.close()\n", - "\n", - "def start_3(date=None,token=None,token_push=None):\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " if date==None:\n", - " date = datetime.now()\n", - " if token==None:\n", - " token = get_head_auth()\n", - " if token_push==None:\n", - " token = get_head_auth()\n", - "\n", - " datas = get_data_value(token, one_cols[1:],date)\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time(date)[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - "\n", - " # 获取当月的数据写入到指定文件\n", - " # optimize_Model()\n", - " # upload_data_to_system(token_push,date)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "def check_data(dataItemNo):\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - "\n", - " datas = get_data_value(token, dataItemNo)\n", - " if not datas:\n", - " return\n", - "\n", - "def save_xls(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xls')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", - "\n", - " search_data = {\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\",\n", - " \"data\": {\n", - " \"dateStart\": dateStart,\n", - " \"dateEnd\": dateEnd,\n", - " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", - " }\n", - " }\n", - "\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " return None\n", - "\n", - "\n", - "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - " current_year_month = datetime.now().strftime('%Y-%m')\n", - " grouped = data_df.groupby(\"dataDate\")\n", - "\n", - " # 使用openpyxl打开xlsx文件\n", - " from openpyxl import load_workbook\n", - " workbook = load_workbook('沥青数据项.xlsx')\n", - "\n", - " # 创建新工作簿\n", - " new_workbook = load_workbook('沥青数据项.xlsx')\n", - " \n", - " for sheetname in workbook.sheetnames:\n", - " sheet = workbook[sheetname]\n", - " new_sheet = new_workbook[sheetname]\n", - " \n", - " current_year_month_row = 0\n", - " # 查找当前月份数据起始行\n", - " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", - " if str(row[0]).startswith(current_year_month):\n", - " current_year_month_row += 1\n", - "\n", - " # 追加新数据\n", - " if sheetname == workbook.sheetnames[0]:\n", - " start_row = sheet.max_row - current_year_month_row + 1\n", - " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", - " new_sheet.cell(row=row_idx, column=1, value=date)\n", - " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", - " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - " new_sheet.cell(row=row_idx, column=j, \n", - " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - " # 保存修改后的xlsx文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "\n", - "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - "# from datetime import datetime, timedelta\n", - "# current_year_month = datetime.now().strftime('%Y-%m')\n", - "# grouped = data_df.groupby(\"dataDate\")\n", - "\n", - "# # 打开xls文件\n", - "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - "# # 获取所有sheet的个数\n", - "# sheet_count = len(workbook.sheet_names())\n", - "\n", - "# # 获取所有sheet的名称\n", - "# sheet_names = workbook.sheet_names()\n", - "\n", - "# new_workbook = xlwt.Workbook()\n", - "# for i in range(sheet_count):\n", - "# # 获取当前sheet\n", - "# sheet = workbook.sheet_by_index(i)\n", - "\n", - "# # 获取sheet的行数和列数\n", - "# row_count = sheet.nrows\n", - "# col_count = sheet.ncols\n", - "# # 获取原有数据\n", - "# data = []\n", - "# for row in range(row_count):\n", - "# row_data = []\n", - "# for col in range(col_count):\n", - "# row_data.append(sheet.cell_value(row, col))\n", - "# data.append(row_data)\n", - "\n", - "# # 创建xlwt的Workbook对象\n", - "# # 创建sheet\n", - "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - "\n", - "# current_year_month_row = 0\n", - "# # 将原有的数据写入新的sheet\n", - "# for row in range(row_count):\n", - "# for col in range(col_count):\n", - "# col0 = data[row][0]\n", - "# # print(\"col0\",col0[:7])\n", - "# if col0[:7] == current_year_month:\n", - "# current_year_month_row += 1\n", - "# break\n", - "# new_sheet.write(row, col, data[row][col])\n", - "\n", - "\n", - "# # print(\"current_year_month_row\",current_year_month_row)\n", - "# if i == 0:\n", - "# rowFlag = 0\n", - "# # 查看每组数据\n", - "# for date, group in grouped:\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", - "# for j in range(len(dataItemNoList)):\n", - "# dataItemNo = dataItemNoList[j]\n", - "\n", - "# # for dataItemNo in dataItemNoList:\n", - "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - "\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - "# rowFlag += 1\n", - "\n", - "\n", - "# # 保存新的xls文件\n", - "# new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def queryDataListItemNos(token=None):\n", - " from datetime import datetime, timedelta\n", - " df = pd.read_excel('沥青数据项.xlsx')\n", - " dataItemNoList = df.iloc[0].tolist()[1:]\n", - " \n", - " if token is None:\n", - " token = get_head_auth()\n", - "\n", - " if not token:\n", - " print('token获取失败')\n", - " return\n", - "\n", - " # 获取当前日期\n", - " current_date = datetime.now()\n", - "\n", - " # 获取当月1日\n", - " first_day_of_month = current_date.replace(day=1)\n", - "\n", - " # 格式化为 YYYYMMDD 格式\n", - " dateEnd = current_date.strftime('%Y%m%d')\n", - " dateStart = first_day_of_month.strftime('%Y%m%d')\n", - "\n", - " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", - "\n", - "\n", - " data_df = pd.DataFrame(search_value)\n", - "\n", - " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", - " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", - " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", - " print('当月数据更新完成')\n", - "\n", - "\n", - "def save_xls_1(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows - 1\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\") \n", - "\n", - "\n", - "\n", - "def start(date=''):\n", - " \"\"\"获取当日数据\"\"\"\n", - " read_xls_data()\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - " cur_time,cur_time2 = getNow(date)\n", - " print(f\"获取{cur_time}数据\")\n", - " datas = get_data_value(token, one_cols,date=cur_time)\n", - " print(len(datas))\n", - " print(datas)\n", - " if not datas:\n", - " return\n", - "\n", - " append_rows = [cur_time2]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " \n", - " print('添加的行:',len(append_rows),append_rows)\n", - " save_xls_2(append_rows)\n", - "\n", - "\n", - "def save_xls_2(append_rows):\n", - " \"\"\"保存或更新数据到Excel文件\n", - " 参数:\n", - " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", - " \"\"\"\n", - " # try:\n", - " # 读取现有数据(假设第一行为列名)\n", - " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", - " print('文件中的数据列数:',len(df.columns),df.columns)\n", - " # 转换append_rows为DataFrame\n", - " if len(append_rows) != len(df.columns):\n", - " # 去除第二个元素 ,不知道什么原因多一个空数据\n", - " append_rows.pop(1)\n", - " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", - " # 创建新数据行\n", - " new_date = append_rows['日期'].values[0]\n", - " \n", - " dates = df['日期'].to_list()\n", - " # 判断日期是否存在\n", - " if new_date in dates:\n", - " # 找到日期所在行的索引\n", - " date_mask = df['日期'] == new_date\n", - " # 存在则更新数据\n", - " df.loc[date_mask] = append_rows.values\n", - " print(f\"更新 {new_date} 数据\")\n", - " else:\n", - " # 不存在则追加数据\n", - " df = pd.concat([df, append_rows], ignore_index=True)\n", - " print(df.head())\n", - " print(df.tail())\n", - " print(f\"插入 {new_date} 新数据\")\n", - " \n", - " # 保存更新后的数据\n", - " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " \n", - " # except FileNotFoundError:\n", - " # # 如果文件不存在则创建新文件\n", - " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " # except Exception as e:\n", - " # print(f\"保存数据时发生错误: {str(e)}\")\n", - "\n", - "\n", - "def main(start_date=None,token=None,token_push=None):\n", - " from datatime import datetime\n", - " if start_date is None:\n", - " start_date = datetime.now()\n", - " if token is None:\n", - " token = get_head_auth()\n", - " if token_push is None:\n", - " token_push = get_head_push_auth()\n", - " date = start_date.strftime('%Y%m%d')\n", - " print(date)\n", - "# start(date)\n", - " # 更新当月数据\n", - " queryDataListItemNos(token)\n", - " # 训练模型\n", - " optimize_Model()\n", - " # # 预测&上传预测结果\n", - " upload_data_to_system(token_push,start_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "module 'datetime' has no attribute 'now'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n", - "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n", - "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'" - ] - } - ], - "source": [ - "\n", - "# if __name__ == \"__main__\":\n", - "# print(\"运行中ing...\")\n", - "# # 每天定时12点运行\n", - "# while True:\n", - "# # 获取当前时间\n", - "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", - "# # print(current_time)\n", - "\n", - "# # 判断当前时间是否为执行任务的时间点\n", - "# try:\n", - "# if current_time == \"09:13:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# elif current_time == \"09:18:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# time.sleep(1)\n", - "# except :\n", - "# print(f\"{current_time}任务失败\")\n", - "main()\n", - "main()\n", - " # 检测数据准确性, 需要检测放开\n", - " # check_data(\"100028098|LISTING_PRICE\")\n", - " # check_data(\"9137070016544622XB|DAY_Yield\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# start_date = datetime(2025, 5, 16)\n", - "# end_date = datetime(2025, 5, 19)\n", - "# token = get_head_auth()\n", - "\n", - "# token_push = get_head_push_auth()\n", - "\n", - "# while start_date < end_date:\n", - "# main(start_date,token,token_push)\n", - "# start_date += timedelta(days=1)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "### 代码备份:\n", - "\n", - "\n", - "class codeBackup:\n", - "\n", - " \n", - "\n", - "\n", - " def write_xls(data,date):\n", - " # 创建一个Workbook对象\n", - " workbook = xlwt.Workbook()\n", - "\n", - " # 创建一个Sheet对象,可指定名称\n", - " sheet = workbook.load('Sheet1')\n", - "\n", - " # 写入数据行\n", - " for row_index, row_data in enumerate(data):\n", - " for col_index, cell_data in enumerate(row_data):\n", - " sheet.write(row_index, col_index, cell_data)\n", - "\n", - " # 保存Workbook到文件\n", - " workbook.save(get_cur_time(date)[0] + '.xls')\n", - "\n", - "\n", - " def start():\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " token_push = get_head_push_auth()\n", - " if not token_push:\n", - " return\n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - " \n", - " # 获取当月的数据写入到指定文件\n", - " queryDataListItemNos(token)\n", - " optimize_Model()\n", - " upload_data_to_system(token_push)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "\n", - "\n", - "\n", - " def start_1():\n", - " '''更新数据'''\n", - " print(\"更新当天数据\")\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " print(\"当天数据为:\",append_rows)\n", - " save_xls_1(append_rows)\n", - "\n", - " \n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 deleted file mode 100644 index 2f999bd..0000000 --- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87044 +++ /dev/null @@ -1,1095 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning:\n", - "\n", - "The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import requests\n", - "import json\n", - "import xlrd\n", - "import xlwt\n", - "from datetime import datetime, timedelta\n", - "import time\n", - "import pandas as pd\n", - "\n", - "# 变量定义\n", - "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", - "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", - "\n", - "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", - "\n", - "login_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "login_push_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "read_file_path_name = \"沥青数据项.xlsx\"\n", - "one_cols = []\n", - "two_cols = []\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sn\n", - "import random\n", - "import time\n", - "\n", - "\n", - "\n", - "\n", - "from plotly import __version__\n", - "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", - "\n", - "from sklearn import preprocessing\n", - "\n", - "from pandas import Series,DataFrame\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import sklearn.datasets as datasets\n", - "\n", - "#导入机器学习算法模型\n", - "from sklearn.linear_model import Lasso\n", - "from xgboost import XGBRegressor\n", - "\n", - "import statsmodels.api as sm\n", - "# from keras.preprocessing.sequence import TimeseriesGenerator\n", - "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "\n", - "import xgboost as xgb\n", - "from xgboost import plot_importance, plot_tree\n", - "from sklearn.metrics import mean_absolute_error\n", - "from statsmodels.tools.eval_measures import mse,rmse\n", - "from sklearn.model_selection import GridSearchCV\n", - "from xgboost import XGBRegressor\n", - "import warnings\n", - "import pickle\n", - "\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "#切割训练数据和样本数据\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "#用于模型评分\n", - "from sklearn.metrics import r2_score\n", - "\n", - "le = preprocessing.LabelEncoder()\n", - "\n", - "# print(__version__) # requires version >= 1.9.0\n", - "\n", - "\n", - "import cufflinks as cf\n", - "cf.go_offline()\n", - "\n", - "random.seed(100)\n", - "\n", - "%matplotlib inline\n", - "\n", - "# 数据获取\n", - "\n", - "def get_head_auth():\n", - " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "\n", - "def get_data_value(token, dataItemNoList,date=''):\n", - "\n", - " search_data = {\n", - " \"data\": {\n", - " \"date\": getNow(date)[0],\n", - " \"dataItemNoList\": dataItemNoList\n", - " },\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\"\n", - " }\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " print(\"今天没有新数据\")\n", - " return None\n", - "\n", - "\n", - "# xls文件处理\n", - "\n", - "\n", - "\n", - "def getNow(date='', offset=0):\n", - " \"\"\"生成指定日期的两种格式字符串\n", - " Args:\n", - " date: 支持多种输入类型:\n", - " - datetime对象\n", - " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", - " - 空字符串表示当前日期\n", - " offset: 日期偏移天数\n", - " Returns:\n", - " tuple: (紧凑日期字符串, 标准日期字符串)\n", - " \"\"\"\n", - " # 日期解析逻辑\n", - " from datetime import datetime,timedelta\n", - " if isinstance(date, datetime):\n", - " now = date\n", - " else:\n", - " now = datetime.now()\n", - " if date:\n", - " # 尝试多种日期格式解析\n", - " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", - " try:\n", - " now = datetime.strptime(str(date), fmt)\n", - " break\n", - " except ValueError:\n", - " continue\n", - " else:\n", - " raise ValueError(f\"无法解析的日期格式: {date}\")\n", - "\n", - " # 应用日期偏移\n", - " now = now - timedelta(days=offset)\n", - " \n", - " # 统一格式化输出\n", - " date_str = now.strftime(\"%Y-%m-%d\")\n", - " compact_date = date_str.replace(\"-\", \"\")\n", - " return compact_date, date_str\n", - "\n", - "\n", - "\n", - "# def get_cur_time(date = ''):\n", - "# if date == '':\n", - "# now = datetime.now()\n", - "# else:\n", - "# now = date\n", - "# year = now.year\n", - "# month = now.month\n", - "# day = now.day\n", - "\n", - "# if month < 10:\n", - "# month = \"0\" + str(month)\n", - "# if day < 10:\n", - "# day = \"0\" + str(day)\n", - "# cur_time = str(year) + str(month) + str(day)\n", - "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", - "# # cur_time = '20231007'\n", - "# # cur_time2 = '2023-10-07'\n", - "# return cur_time, cur_time2\n", - "\n", - "\n", - "def get_head_push_auth():\n", - " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "def upload_data_to_system(token_push,date):\n", - " data = {\n", - " \"funcModule\": \"数据表信息列表\",\n", - " \"funcOperation\": \"新增\",\n", - " \"data\": [\n", - " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", - " \"dataDate\": getNow(date)[0],\n", - " \"dataStatus\": \"add\",\n", - " \"dataValue\": forecast_price()\n", - " }\n", - "\n", - " ]\n", - " }\n", - " headers = {\"Authorization\": token_push}\n", - " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", - " print(res.text)\n", - "\n", - "def forecast_price():\n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", - " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - "\n", - " #查看每个特征缺失值数量\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - "\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - "\n", - " # 选择用于模型训练的列名称\n", - " col_for_training = df_test_1.columns\n", - " import joblib\n", - " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", - " # 最新的一天为最后一行的数据\n", - " df_test_1_Day = df_test_1.tail(1)\n", - " # 移除不需要的列\n", - " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", - " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", - " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", - " df_test_1_Day=df_test_1_Day.dropna()\n", - "\n", - " # df_test_1_Day\n", - " #预测今日价格,显示至小数点后两位\n", - " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", - "\n", - " df_test_1_Day['日度预测价格']=Ypredict_Today\n", - " print(df_test_1_Day['日度预测价格'])\n", - " a = df_test_1_Day['日度预测价格']\n", - " a = a[0]\n", - " a = float(a)\n", - " a = round(a,2)\n", - " return a\n", - "def optimize_Model():\n", - " from sklearn.model_selection import train_test_split\n", - " from sklearn.impute import SimpleImputer\n", - " from sklearn.preprocessing import OrdinalEncoder\n", - " from sklearn.feature_selection import SelectFromModel\n", - " from sklearn.metrics import mean_squared_error, r2_score\n", - "\n", - " pd.set_option('display.max_rows',40) \n", - " pd.set_option('display.max_columns',40) \n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - " # df_test.tail(3)\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", - " df_test_1.index = df_test_1[\"日期\"]\n", - " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", - " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", - "\n", - " y=df_test_1['京博指导价']\n", - "\n", - " x=dataset1 \n", - "\n", - " train = x\n", - " target = y\n", - "\n", - " #切割数据样本集合测试集\n", - " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", - " \n", - " \n", - " from sklearn.linear_model import Lasso\n", - " from xgboost import XGBRegressor\n", - "\n", - " import statsmodels.api as sm\n", - " # from keras.preprocessing.sequence import TimeseriesGenerator\n", - " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - " import plotly.express as px\n", - " import plotly.graph_objects as go\n", - "\n", - " import xgboost as xgb\n", - " from xgboost import plot_importance, plot_tree\n", - " from sklearn.metrics import mean_absolute_error\n", - " from statsmodels.tools.eval_measures import mse,rmse\n", - " from sklearn.model_selection import GridSearchCV\n", - " from xgboost import XGBRegressor\n", - " import warnings\n", - " import pickle\n", - "\n", - " from sklearn.metrics import mean_squared_error\n", - "\n", - " #切割训练数据和样本数据\n", - " from sklearn.model_selection import train_test_split\n", - "\n", - " #用于模型评分\n", - " from sklearn.metrics import r2_score\n", - "\n", - " #模型缩写\n", - " Lasso = Lasso(random_state=0)\n", - " XGBR = XGBRegressor(random_state=0)\n", - " Lasso.fit(X_train,y_train)\n", - " XGBR.fit(X_train,y_train)\n", - " y_pre_Lasso = Lasso.predict(x_test)\n", - " y_pre_XGBR = XGBR.predict(x_test)\n", - "\n", - " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", - " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", - " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", - "\n", - " #计算Lasso、XGBR的MSE和RMSE\n", - " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", - " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", - "\n", - " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", - " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", - " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", - " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", - " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", - " model_results1=model_results.set_index('模型(Model)')\n", - "\n", - " def plot_feature_importance(importance,names,model_type):\n", - " feature_importance = np.array(importance)\n", - " feature_names = np.array(names)\n", - "\n", - " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", - " fi_df = pd.DataFrame(data)\n", - "\n", - " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", - "\n", - " plt.figure(figsize=(10,8))\n", - " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", - "\n", - " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", - " plt.xlabel('FEATURE IMPORTANCE')\n", - " plt.ylabel('FEATURE NAMES')\n", - " from pylab import mpl\n", - " %pylab\n", - " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", - " from xgboost import XGBRegressor\n", - " from sklearn.model_selection import GridSearchCV\n", - "\n", - " estimator = XGBRegressor(random_state=0,\n", - " nthread=4,\n", - " seed=0\n", - " )\n", - " parameters = {\n", - " 'max_depth': range (2, 11, 2), # 树的最大深度\n", - " 'n_estimators': range (50, 101, 10), # 迭代次数\n", - " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", - " }\n", - "\n", - " grid_search_XGB = GridSearchCV(\n", - " estimator=estimator,\n", - " param_grid=parameters,\n", - " # n_jobs = 10,\n", - " cv = 3,\n", - " verbose=True\n", - " )\n", - "\n", - " grid_search_XGB.fit(X_train, y_train)\n", - " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", - " print(\"Best parameters set:\")\n", - " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", - " for param_name in sorted(parameters.keys()):\n", - " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", - " y_pred = grid_search_XGB.predict(x_test)\n", - "\n", - " op_XGBR_score = r2_score(y_true,y_pred)\n", - " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", - " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", - "\n", - " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", - " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", - " model_results2=model_results2.set_index('模型(Model)')\n", - "\n", - " # results = model_results1.append(model_results2, ignore_index = False)\n", - " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", - " results\n", - " import pickle\n", - "\n", - " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", - "\n", - " with open(Pkl_Filename, 'wb') as file: \n", - " pickle.dump(grid_search_XGB, file)\n", - "\n", - "def read_xls_data():\n", - " \"\"\"获取特征项ID\"\"\"\n", - " global one_cols, two_cols\n", - " # 使用pandas读取Excel文件\n", - " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", - " # 获取第二行数据(索引为1)\n", - " one_cols = df.iloc[1].tolist()[1:]\n", - " print(f'获取到的数据项ID{one_cols}')\n", - "\n", - "# def read_xls_data():\n", - "# global one_cols, two_cols\n", - "# # 打开 XLS 文件\n", - "# workbook = xlrd.open_workbook(read_file_path_name)\n", - "\n", - "# # 获取所有表格名称\n", - "# # sheet_names = workbook.sheet_names()\n", - "\n", - "# # 选择第一个表格\n", - "# sheet = workbook.sheet_by_index(0)\n", - "\n", - "# # 获取行数和列数\n", - "# num_rows = sheet.nrows\n", - "# # num_cols = sheet.ncols\n", - "\n", - "# # 遍历每一行,获取单元格数据\n", - "# # for i in range(num_rows):\n", - "# # row_data = sheet.row_values(i)\n", - "# # one_cols.append(row_data)\n", - "# # two_cols.append(row_data[1])\n", - "\n", - "# row_data = sheet.row_values(1)\n", - "# one_cols = row_data\n", - "\n", - "# # 关闭 XLS 文件\n", - "# # workbook.close()\n", - "\n", - "def start_3(date=None,token=None,token_push=None):\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " if date==None:\n", - " date = datetime.now()\n", - " if token==None:\n", - " token = get_head_auth()\n", - " if token_push==None:\n", - " token = get_head_auth()\n", - "\n", - " datas = get_data_value(token, one_cols[1:],date)\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time(date)[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - "\n", - " # 获取当月的数据写入到指定文件\n", - " # optimize_Model()\n", - " # upload_data_to_system(token_push,date)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "def check_data(dataItemNo):\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - "\n", - " datas = get_data_value(token, dataItemNo)\n", - " if not datas:\n", - " return\n", - "\n", - "def save_xls(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xls')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", - "\n", - " search_data = {\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\",\n", - " \"data\": {\n", - " \"dateStart\": dateStart,\n", - " \"dateEnd\": dateEnd,\n", - " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", - " }\n", - " }\n", - "\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " return None\n", - "\n", - "\n", - "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - " current_year_month = datetime.now().strftime('%Y-%m')\n", - " grouped = data_df.groupby(\"dataDate\")\n", - "\n", - " # 使用openpyxl打开xlsx文件\n", - " from openpyxl import load_workbook\n", - " workbook = load_workbook('沥青数据项.xlsx')\n", - "\n", - " # 创建新工作簿\n", - " new_workbook = load_workbook('沥青数据项.xlsx')\n", - " \n", - " for sheetname in workbook.sheetnames:\n", - " sheet = workbook[sheetname]\n", - " new_sheet = new_workbook[sheetname]\n", - " \n", - " current_year_month_row = 0\n", - " # 查找当前月份数据起始行\n", - " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", - " if str(row[0]).startswith(current_year_month):\n", - " current_year_month_row += 1\n", - "\n", - " # 追加新数据\n", - " if sheetname == workbook.sheetnames[0]:\n", - " start_row = sheet.max_row - current_year_month_row + 1\n", - " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", - " new_sheet.cell(row=row_idx, column=1, value=date)\n", - " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", - " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - " new_sheet.cell(row=row_idx, column=j, \n", - " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - " # 保存修改后的xlsx文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "\n", - "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - "# from datetime import datetime, timedelta\n", - "# current_year_month = datetime.now().strftime('%Y-%m')\n", - "# grouped = data_df.groupby(\"dataDate\")\n", - "\n", - "# # 打开xls文件\n", - "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - "# # 获取所有sheet的个数\n", - "# sheet_count = len(workbook.sheet_names())\n", - "\n", - "# # 获取所有sheet的名称\n", - "# sheet_names = workbook.sheet_names()\n", - "\n", - "# new_workbook = xlwt.Workbook()\n", - "# for i in range(sheet_count):\n", - "# # 获取当前sheet\n", - "# sheet = workbook.sheet_by_index(i)\n", - "\n", - "# # 获取sheet的行数和列数\n", - "# row_count = sheet.nrows\n", - "# col_count = sheet.ncols\n", - "# # 获取原有数据\n", - "# data = []\n", - "# for row in range(row_count):\n", - "# row_data = []\n", - "# for col in range(col_count):\n", - "# row_data.append(sheet.cell_value(row, col))\n", - "# data.append(row_data)\n", - "\n", - "# # 创建xlwt的Workbook对象\n", - "# # 创建sheet\n", - "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - "\n", - "# current_year_month_row = 0\n", - "# # 将原有的数据写入新的sheet\n", - "# for row in range(row_count):\n", - "# for col in range(col_count):\n", - "# col0 = data[row][0]\n", - "# # print(\"col0\",col0[:7])\n", - "# if col0[:7] == current_year_month:\n", - "# current_year_month_row += 1\n", - "# break\n", - "# new_sheet.write(row, col, data[row][col])\n", - "\n", - "\n", - "# # print(\"current_year_month_row\",current_year_month_row)\n", - "# if i == 0:\n", - "# rowFlag = 0\n", - "# # 查看每组数据\n", - "# for date, group in grouped:\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", - "# for j in range(len(dataItemNoList)):\n", - "# dataItemNo = dataItemNoList[j]\n", - "\n", - "# # for dataItemNo in dataItemNoList:\n", - "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - "\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - "# rowFlag += 1\n", - "\n", - "\n", - "# # 保存新的xls文件\n", - "# new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def queryDataListItemNos(token=None):\n", - " from datetime import datetime, timedelta\n", - " df = pd.read_excel('沥青数据项.xlsx')\n", - " dataItemNoList = df.iloc[0].tolist()[1:]\n", - " \n", - " if token is None:\n", - " token = get_head_auth()\n", - "\n", - " if not token:\n", - " print('token获取失败')\n", - " return\n", - "\n", - " # 获取当前日期\n", - " current_date = datetime.now()\n", - "\n", - " # 获取当月1日\n", - " first_day_of_month = current_date.replace(day=1)\n", - "\n", - " # 格式化为 YYYYMMDD 格式\n", - " dateEnd = current_date.strftime('%Y%m%d')\n", - " dateStart = first_day_of_month.strftime('%Y%m%d')\n", - "\n", - " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", - "\n", - "\n", - " data_df = pd.DataFrame(search_value)\n", - "\n", - " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", - " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", - " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", - " print('当月数据更新完成')\n", - "\n", - "\n", - "def save_xls_1(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows - 1\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\") \n", - "\n", - "\n", - "\n", - "def start(date=''):\n", - " \"\"\"获取当日数据\"\"\"\n", - " read_xls_data()\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - " cur_time,cur_time2 = getNow(date)\n", - " print(f\"获取{cur_time}数据\")\n", - " datas = get_data_value(token, one_cols,date=cur_time)\n", - " print(len(datas))\n", - " print(datas)\n", - " if not datas:\n", - " return\n", - "\n", - " append_rows = [cur_time2]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " \n", - " print('添加的行:',len(append_rows),append_rows)\n", - " save_xls_2(append_rows)\n", - "\n", - "\n", - "def save_xls_2(append_rows):\n", - " \"\"\"保存或更新数据到Excel文件\n", - " 参数:\n", - " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", - " \"\"\"\n", - " # try:\n", - " # 读取现有数据(假设第一行为列名)\n", - " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", - " print('文件中的数据列数:',len(df.columns),df.columns)\n", - " # 转换append_rows为DataFrame\n", - " if len(append_rows) != len(df.columns):\n", - " # 去除第二个元素 ,不知道什么原因多一个空数据\n", - " append_rows.pop(1)\n", - " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", - " # 创建新数据行\n", - " new_date = append_rows['日期'].values[0]\n", - " \n", - " dates = df['日期'].to_list()\n", - " # 判断日期是否存在\n", - " if new_date in dates:\n", - " # 找到日期所在行的索引\n", - " date_mask = df['日期'] == new_date\n", - " # 存在则更新数据\n", - " df.loc[date_mask] = append_rows.values\n", - " print(f\"更新 {new_date} 数据\")\n", - " else:\n", - " # 不存在则追加数据\n", - " df = pd.concat([df, append_rows], ignore_index=True)\n", - " print(df.head())\n", - " print(df.tail())\n", - " print(f\"插入 {new_date} 新数据\")\n", - " \n", - " # 保存更新后的数据\n", - " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " \n", - " # except FileNotFoundError:\n", - " # # 如果文件不存在则创建新文件\n", - " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " # except Exception as e:\n", - " # print(f\"保存数据时发生错误: {str(e)}\")\n", - "\n", - "\n", - "def main(start_date=None,token=None,token_push=None):\n", - " if start_date is None:\n", - " start_date = datetime.now()\n", - " if token is None:\n", - " token = get_head_auth()\n", - " if token_push is None:\n", - " token_push = get_head_push_auth()\n", - " date = start_date.strftime('%Y%m%d')\n", - " print(date)\n", - "# start(date)\n", - " # 更新当月数据\n", - " queryDataListItemNos(token)\n", - " # 训练模型\n", - " optimize_Model()\n", - " # # 预测&上传预测结果\n", - " upload_data_to_system(token_push,start_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "运行中ing...\n" - ] - } - ], - "source": [ - "\n", - "if __name__ == \"__main__\":\n", - " print(\"运行中ing...\")\n", - " # 每天定时12点运行\n", - " while True:\n", - " # 获取当前时间\n", - " current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", - " # print(current_time)\n", - "\n", - " # 判断当前时间是否为执行任务的时间点\n", - " try:\n", - " if current_time == \"09:13:00\":\n", - " print(\"执行定时任务\")\n", - " main()\n", - " elif current_time == \"09:18:00\":\n", - " print(\"执行定时任务\")\n", - " main()\n", - " time.sleep(1)\n", - " except :\n", - " print(f\"{current_time}任务失败\")\n", - "# main()\n", - " # 检测数据准确性, 需要检测放开\n", - " # check_data(\"100028098|LISTING_PRICE\")\n", - " # check_data(\"9137070016544622XB|DAY_Yield\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# start_date = datetime(2025, 5, 16)\n", - "# end_date = datetime(2025, 5, 19)\n", - "# token = get_head_auth()\n", - "\n", - "# token_push = get_head_push_auth()\n", - "\n", - "# while start_date < end_date:\n", - "# main(start_date,token,token_push)\n", - "# start_date += timedelta(days=1)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "### 代码备份:\n", - "\n", - "\n", - "class codeBackup:\n", - "\n", - " \n", - "\n", - "\n", - " def write_xls(data,date):\n", - " # 创建一个Workbook对象\n", - " workbook = xlwt.Workbook()\n", - "\n", - " # 创建一个Sheet对象,可指定名称\n", - " sheet = workbook.load('Sheet1')\n", - "\n", - " # 写入数据行\n", - " for row_index, row_data in enumerate(data):\n", - " for col_index, cell_data in enumerate(row_data):\n", - " sheet.write(row_index, col_index, cell_data)\n", - "\n", - " # 保存Workbook到文件\n", - " workbook.save(get_cur_time(date)[0] + '.xls')\n", - "\n", - "\n", - " def start():\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " token_push = get_head_push_auth()\n", - " if not token_push:\n", - " return\n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - " \n", - " # 获取当月的数据写入到指定文件\n", - " queryDataListItemNos(token)\n", - " optimize_Model()\n", - " upload_data_to_system(token_push)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "\n", - "\n", - "\n", - " def start_1():\n", - " '''更新数据'''\n", - " print(\"更新当天数据\")\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " print(\"当天数据为:\",append_rows)\n", - " save_xls_1(append_rows)\n", - "\n", - " \n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 deleted file mode 100644 index 44fa65f..0000000 --- a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb.r87201 +++ /dev/null @@ -1,1156 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - " \n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import requests\n", - "import json\n", - "from datetime import datetime, timedelta\n", - "import time\n", - "import pandas as pd\n", - "\n", - "# 变量定义\n", - "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", - "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", - "\n", - "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", - "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", - "\n", - "login_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "login_push_data = {\n", - " \"data\": {\n", - " \"account\": \"api_dev\",\n", - " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", - " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", - " \"terminal\": \"API\"\n", - " },\n", - " \"funcModule\": \"API\",\n", - " \"funcOperation\": \"获取token\"\n", - "}\n", - "\n", - "read_file_path_name = \"沥青数据项.xlsx\"\n", - "one_cols = []\n", - "two_cols = []\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sn\n", - "import random\n", - "import time\n", - "\n", - "\n", - "\n", - "\n", - "from plotly import __version__\n", - "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", - "\n", - "from sklearn import preprocessing\n", - "\n", - "from pandas import Series,DataFrame\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import sklearn.datasets as datasets\n", - "\n", - "#导入机器学习算法模型\n", - "from sklearn.linear_model import Lasso\n", - "from xgboost import XGBRegressor\n", - "\n", - "import statsmodels.api as sm\n", - "# from keras.preprocessing.sequence import TimeseriesGenerator\n", - "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "\n", - "import xgboost as xgb\n", - "from xgboost import plot_importance, plot_tree\n", - "from sklearn.metrics import mean_absolute_error\n", - "from statsmodels.tools.eval_measures import mse,rmse\n", - "from sklearn.model_selection import GridSearchCV\n", - "from xgboost import XGBRegressor\n", - "import warnings\n", - "import pickle\n", - "\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "#切割训练数据和样本数据\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "#用于模型评分\n", - "from sklearn.metrics import r2_score\n", - "\n", - "le = preprocessing.LabelEncoder()\n", - "\n", - "# print(__version__) # requires version >= 1.9.0\n", - "\n", - "\n", - "import cufflinks as cf\n", - "cf.go_offline()\n", - "\n", - "random.seed(100)\n", - "\n", - "%matplotlib inline\n", - "\n", - "# 数据获取\n", - "\n", - "def get_head_auth():\n", - " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "\n", - "def get_data_value(token, dataItemNoList,date=''):\n", - "\n", - " search_data = {\n", - " \"data\": {\n", - " \"date\": getNow(date)[0],\n", - " \"dataItemNoList\": dataItemNoList\n", - " },\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\"\n", - " }\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " print(\"今天没有新数据\")\n", - " return None\n", - "\n", - "\n", - "# xls文件处理\n", - "\n", - "\n", - "\n", - "def getNow(date='', offset=0):\n", - " \"\"\"生成指定日期的两种格式字符串\n", - " Args:\n", - " date: 支持多种输入类型:\n", - " - datetime对象\n", - " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", - " - 空字符串表示当前日期\n", - " offset: 日期偏移天数\n", - " Returns:\n", - " tuple: (紧凑日期字符串, 标准日期字符串)\n", - " \"\"\"\n", - " # 日期解析逻辑\n", - " from datetime import datetime,timedelta\n", - " if isinstance(date, datetime):\n", - " now = date\n", - " else:\n", - " now = datetime.now()\n", - " if date:\n", - " # 尝试多种日期格式解析\n", - " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", - " try:\n", - " now = datetime.strptime(str(date), fmt)\n", - " break\n", - " except ValueError:\n", - " continue\n", - " else:\n", - " raise ValueError(f\"无法解析的日期格式: {date}\")\n", - "\n", - " # 应用日期偏移\n", - " now = now - timedelta(days=offset)\n", - " \n", - " # 统一格式化输出\n", - " date_str = now.strftime(\"%Y-%m-%d\")\n", - " compact_date = date_str.replace(\"-\", \"\")\n", - " return compact_date, date_str\n", - "\n", - "\n", - "\n", - "# def get_cur_time(date = ''):\n", - "# if date == '':\n", - "# now = datetime.now()\n", - "# else:\n", - "# now = date\n", - "# year = now.year\n", - "# month = now.month\n", - "# day = now.day\n", - "\n", - "# if month < 10:\n", - "# month = \"0\" + str(month)\n", - "# if day < 10:\n", - "# day = \"0\" + str(day)\n", - "# cur_time = str(year) + str(month) + str(day)\n", - "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", - "# # cur_time = '20231007'\n", - "# # cur_time2 = '2023-10-07'\n", - "# return cur_time, cur_time2\n", - "\n", - "\n", - "def get_head_push_auth():\n", - " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", - " text = json.loads(login_res.text)\n", - " if text[\"status\"]:\n", - " token = text[\"data\"][\"accessToken\"]\n", - " return token\n", - " else:\n", - " print(\"获取认证失败\")\n", - " return None\n", - "\n", - "def upload_data_to_system(token_push,date):\n", - " data = {\n", - " \"funcModule\": \"数据表信息列表\",\n", - " \"funcOperation\": \"新增\",\n", - " \"data\": [\n", - " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", - " \"dataDate\": getNow(date)[0],\n", - " \"dataStatus\": \"add\",\n", - " \"dataValue\": forecast_price()\n", - " }\n", - "\n", - " ]\n", - " }\n", - " headers = {\"Authorization\": token_push}\n", - " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", - " print(res.text)\n", - "\n", - "def forecast_price():\n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", - " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - "\n", - " #查看每个特征缺失值数量\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - "\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - "\n", - " # 选择用于模型训练的列名称\n", - " col_for_training = df_test_1.columns\n", - " import joblib\n", - " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", - " # 最新的一天为最后一行的数据\n", - " df_test_1_Day = df_test_1.tail(1)\n", - " # 移除不需要的列\n", - " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", - " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", - " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", - " df_test_1_Day=df_test_1_Day.dropna()\n", - "\n", - " # df_test_1_Day\n", - " #预测今日价格,显示至小数点后两位\n", - " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", - "\n", - " df_test_1_Day['日度预测价格']=Ypredict_Today\n", - " print(df_test_1_Day['日度预测价格'])\n", - " a = df_test_1_Day['日度预测价格']\n", - " a = a[0]\n", - " a = float(a)\n", - " a = round(a,2)\n", - " return a\n", - "def optimize_Model():\n", - " from sklearn.model_selection import train_test_split\n", - " from sklearn.impute import SimpleImputer\n", - " from sklearn.preprocessing import OrdinalEncoder\n", - " from sklearn.feature_selection import SelectFromModel\n", - " from sklearn.metrics import mean_squared_error, r2_score\n", - "\n", - " pd.set_option('display.max_rows',40) \n", - " pd.set_option('display.max_columns',40) \n", - " df_test = pd.read_excel('沥青数据项.xlsx')\n", - " df_test.drop([0],inplace=True)\n", - " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", - " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", - " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", - " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", - " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", - " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", - " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", - " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", - " # df_test.tail(3)\n", - " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", - " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", - " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", - " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", - " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", - " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", - " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", - " #将缺失值补为前一个或者后一个数值\n", - " df_test_1=df_test_1.fillna(df_test.ffill())\n", - " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", - " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", - " df_test_1.index = df_test_1[\"日期\"]\n", - " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", - " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", - "\n", - " y=df_test_1['京博指导价']\n", - "\n", - " x=dataset1 \n", - "\n", - " train = x\n", - " target = y\n", - "\n", - " #切割数据样本集合测试集\n", - " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", - " \n", - " \n", - " from sklearn.linear_model import Lasso\n", - " from xgboost import XGBRegressor\n", - "\n", - " import statsmodels.api as sm\n", - " # from keras.preprocessing.sequence import TimeseriesGenerator\n", - " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", - "\n", - " import plotly.express as px\n", - " import plotly.graph_objects as go\n", - "\n", - " import xgboost as xgb\n", - " from xgboost import plot_importance, plot_tree\n", - " from sklearn.metrics import mean_absolute_error\n", - " from statsmodels.tools.eval_measures import mse,rmse\n", - " from sklearn.model_selection import GridSearchCV\n", - " from xgboost import XGBRegressor\n", - " import warnings\n", - " import pickle\n", - "\n", - " from sklearn.metrics import mean_squared_error\n", - "\n", - " #切割训练数据和样本数据\n", - " from sklearn.model_selection import train_test_split\n", - "\n", - " #用于模型评分\n", - " from sklearn.metrics import r2_score\n", - "\n", - " #模型缩写\n", - " Lasso = Lasso(random_state=0)\n", - " XGBR = XGBRegressor(random_state=0)\n", - " Lasso.fit(X_train,y_train)\n", - " XGBR.fit(X_train,y_train)\n", - " y_pre_Lasso = Lasso.predict(x_test)\n", - " y_pre_XGBR = XGBR.predict(x_test)\n", - "\n", - " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", - " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", - " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", - "\n", - " #计算Lasso、XGBR的MSE和RMSE\n", - " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", - " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", - "\n", - " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", - " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", - " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", - " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", - " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", - " model_results1=model_results.set_index('模型(Model)')\n", - "\n", - " def plot_feature_importance(importance,names,model_type):\n", - " feature_importance = np.array(importance)\n", - " feature_names = np.array(names)\n", - "\n", - " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", - " fi_df = pd.DataFrame(data)\n", - "\n", - " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", - "\n", - " plt.figure(figsize=(10,8))\n", - " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", - "\n", - " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", - " plt.xlabel('FEATURE IMPORTANCE')\n", - " plt.ylabel('FEATURE NAMES')\n", - " from pylab import mpl\n", - " %pylab\n", - " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", - " from xgboost import XGBRegressor\n", - " from sklearn.model_selection import GridSearchCV\n", - "\n", - " estimator = XGBRegressor(random_state=0,\n", - " nthread=4,\n", - " seed=0\n", - " )\n", - " parameters = {\n", - " 'max_depth': range (2, 11, 2), # 树的最大深度\n", - " 'n_estimators': range (50, 101, 10), # 迭代次数\n", - " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", - " }\n", - "\n", - " grid_search_XGB = GridSearchCV(\n", - " estimator=estimator,\n", - " param_grid=parameters,\n", - " # n_jobs = 10,\n", - " cv = 3,\n", - " verbose=True\n", - " )\n", - "\n", - " grid_search_XGB.fit(X_train, y_train)\n", - " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", - " print(\"Best parameters set:\")\n", - " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", - " for param_name in sorted(parameters.keys()):\n", - " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", - " y_pred = grid_search_XGB.predict(x_test)\n", - "\n", - " op_XGBR_score = r2_score(y_true,y_pred)\n", - " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", - " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", - "\n", - " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", - " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", - " model_results2=model_results2.set_index('模型(Model)')\n", - "\n", - " # results = model_results1.append(model_results2, ignore_index = False)\n", - " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", - " results\n", - " import pickle\n", - "\n", - " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", - "\n", - " with open(Pkl_Filename, 'wb') as file: \n", - " pickle.dump(grid_search_XGB, file)\n", - "\n", - "def read_xls_data():\n", - " \"\"\"获取特征项ID\"\"\"\n", - " global one_cols, two_cols\n", - " # 使用pandas读取Excel文件\n", - " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", - " # 获取第二行数据(索引为1)\n", - " one_cols = df.iloc[1].tolist()[1:]\n", - " print(f'获取到的数据项ID{one_cols}')\n", - "\n", - "# def read_xls_data():\n", - "# global one_cols, two_cols\n", - "# # 打开 XLS 文件\n", - "# workbook = xlrd.open_workbook(read_file_path_name)\n", - "\n", - "# # 获取所有表格名称\n", - "# # sheet_names = workbook.sheet_names()\n", - "\n", - "# # 选择第一个表格\n", - "# sheet = workbook.sheet_by_index(0)\n", - "\n", - "# # 获取行数和列数\n", - "# num_rows = sheet.nrows\n", - "# # num_cols = sheet.ncols\n", - "\n", - "# # 遍历每一行,获取单元格数据\n", - "# # for i in range(num_rows):\n", - "# # row_data = sheet.row_values(i)\n", - "# # one_cols.append(row_data)\n", - "# # two_cols.append(row_data[1])\n", - "\n", - "# row_data = sheet.row_values(1)\n", - "# one_cols = row_data\n", - "\n", - "# # 关闭 XLS 文件\n", - "# # workbook.close()\n", - "\n", - "def start_3(date=None,token=None,token_push=None):\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " if date==None:\n", - " date = datetime.now()\n", - " if token==None:\n", - " token = get_head_auth()\n", - " if token_push==None:\n", - " token = get_head_auth()\n", - "\n", - " datas = get_data_value(token, one_cols[1:],date)\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time(date)[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - "\n", - " # 获取当月的数据写入到指定文件\n", - " # optimize_Model()\n", - " # upload_data_to_system(token_push,date)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "def check_data(dataItemNo):\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - "\n", - " datas = get_data_value(token, dataItemNo)\n", - " if not datas:\n", - " return\n", - "\n", - "def save_xls(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xls')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", - "\n", - " search_data = {\n", - " \"funcModule\": \"数据项\",\n", - " \"funcOperation\": \"查询\",\n", - " \"data\": {\n", - " \"dateStart\": dateStart,\n", - " \"dateEnd\": dateEnd,\n", - " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", - " }\n", - " }\n", - "\n", - " headers = {\"Authorization\": token}\n", - " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", - " search_value = json.loads(search_res.text)[\"data\"]\n", - " if search_value:\n", - " return search_value\n", - " else:\n", - " return None\n", - "\n", - "\n", - "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - " current_year_month = datetime.now().strftime('%Y-%m')\n", - " grouped = data_df.groupby(\"dataDate\")\n", - "\n", - " # 使用openpyxl打开xlsx文件\n", - " from openpyxl import load_workbook\n", - " workbook = load_workbook('沥青数据项.xlsx')\n", - "\n", - " # 创建新工作簿\n", - " new_workbook = load_workbook('沥青数据项.xlsx')\n", - " \n", - " for sheetname in workbook.sheetnames:\n", - " sheet = workbook[sheetname]\n", - " new_sheet = new_workbook[sheetname]\n", - " \n", - " current_year_month_row = 0\n", - " # 查找当前月份数据起始行\n", - " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", - " if str(row[0]).startswith(current_year_month):\n", - " current_year_month_row += 1\n", - "\n", - " # 追加新数据\n", - " if sheetname == workbook.sheetnames[0]:\n", - " start_row = sheet.max_row - current_year_month_row + 1\n", - " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", - " new_sheet.cell(row=row_idx, column=1, value=date)\n", - " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", - " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - " new_sheet.cell(row=row_idx, column=j, \n", - " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - " # 保存修改后的xlsx文件\n", - " new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "\n", - "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", - "# from datetime import datetime, timedelta\n", - "# current_year_month = datetime.now().strftime('%Y-%m')\n", - "# grouped = data_df.groupby(\"dataDate\")\n", - "\n", - "# # 打开xls文件\n", - "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - "# # 获取所有sheet的个数\n", - "# sheet_count = len(workbook.sheet_names())\n", - "\n", - "# # 获取所有sheet的名称\n", - "# sheet_names = workbook.sheet_names()\n", - "\n", - "# new_workbook = xlwt.Workbook()\n", - "# for i in range(sheet_count):\n", - "# # 获取当前sheet\n", - "# sheet = workbook.sheet_by_index(i)\n", - "\n", - "# # 获取sheet的行数和列数\n", - "# row_count = sheet.nrows\n", - "# col_count = sheet.ncols\n", - "# # 获取原有数据\n", - "# data = []\n", - "# for row in range(row_count):\n", - "# row_data = []\n", - "# for col in range(col_count):\n", - "# row_data.append(sheet.cell_value(row, col))\n", - "# data.append(row_data)\n", - "\n", - "# # 创建xlwt的Workbook对象\n", - "# # 创建sheet\n", - "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - "\n", - "# current_year_month_row = 0\n", - "# # 将原有的数据写入新的sheet\n", - "# for row in range(row_count):\n", - "# for col in range(col_count):\n", - "# col0 = data[row][0]\n", - "# # print(\"col0\",col0[:7])\n", - "# if col0[:7] == current_year_month:\n", - "# current_year_month_row += 1\n", - "# break\n", - "# new_sheet.write(row, col, data[row][col])\n", - "\n", - "\n", - "# # print(\"current_year_month_row\",current_year_month_row)\n", - "# if i == 0:\n", - "# rowFlag = 0\n", - "# # 查看每组数据\n", - "# for date, group in grouped:\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", - "# for j in range(len(dataItemNoList)):\n", - "# dataItemNo = dataItemNoList[j]\n", - "\n", - "# # for dataItemNo in dataItemNoList:\n", - "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", - "\n", - "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", - "\n", - "# rowFlag += 1\n", - "\n", - "\n", - "# # 保存新的xls文件\n", - "# new_workbook.save(\"沥青数据项.xlsx\")\n", - "\n", - "def queryDataListItemNos(token=None):\n", - " from datetime import datetime, timedelta\n", - " df = pd.read_excel('沥青数据项.xlsx')\n", - " dataItemNoList = df.iloc[0].tolist()[1:]\n", - " \n", - " if token is None:\n", - " token = get_head_auth()\n", - "\n", - " if not token:\n", - " print('token获取失败')\n", - " return\n", - "\n", - " # 获取当前日期\n", - " current_date = datetime.now()\n", - "\n", - " # 获取当月1日\n", - " first_day_of_month = current_date.replace(day=1)\n", - "\n", - " # 格式化为 YYYYMMDD 格式\n", - " dateEnd = current_date.strftime('%Y%m%d')\n", - " dateStart = first_day_of_month.strftime('%Y%m%d')\n", - "\n", - " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", - "\n", - "\n", - " data_df = pd.DataFrame(search_value)\n", - "\n", - " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", - " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", - " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", - " print('当月数据更新完成')\n", - "\n", - "\n", - "def save_xls_1(append_rows):\n", - "\n", - " # 打开xls文件\n", - " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", - "\n", - " # 获取所有sheet的个数\n", - " sheet_count = len(workbook.sheet_names())\n", - "\n", - " # 获取所有sheet的名称\n", - " sheet_names = workbook.sheet_names()\n", - "\n", - " new_workbook = xlwt.Workbook()\n", - " for i in range(sheet_count):\n", - " # 获取当前sheet\n", - " sheet = workbook.sheet_by_index(i)\n", - "\n", - " # 获取sheet的行数和列数\n", - " row_count = sheet.nrows - 1\n", - " col_count = sheet.ncols\n", - " # 获取原有数据\n", - " data = []\n", - " for row in range(row_count):\n", - " row_data = []\n", - " for col in range(col_count):\n", - " row_data.append(sheet.cell_value(row, col))\n", - " data.append(row_data)\n", - " # 创建xlwt的Workbook对象\n", - " # 创建sheet\n", - " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", - "\n", - " # 将原有的数据写入新的sheet\n", - " for row in range(row_count):\n", - " for col in range(col_count):\n", - " new_sheet.write(row, col, data[row][col])\n", - "\n", - " if i == 0:\n", - " # 在新的sheet中添加数据\n", - " for col in range(col_count):\n", - " new_sheet.write(row_count, col, append_rows[col])\n", - "\n", - " # 保存新的xls文件\n", - " new_workbook.save(\"沥青数据项.xlsx\") \n", - "\n", - "\n", - "\n", - "def start(date=''):\n", - " \"\"\"获取当日数据\"\"\"\n", - " read_xls_data()\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - " cur_time,cur_time2 = getNow(date)\n", - " print(f\"获取{cur_time}数据\")\n", - " datas = get_data_value(token, one_cols,date=cur_time)\n", - " print(len(datas))\n", - " print(datas)\n", - " if not datas:\n", - " return\n", - "\n", - " append_rows = [cur_time2]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " \n", - " print('添加的行:',len(append_rows),append_rows)\n", - " save_xls_2(append_rows)\n", - "\n", - "\n", - "def save_xls_2(append_rows):\n", - " \"\"\"保存或更新数据到Excel文件\n", - " 参数:\n", - " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", - " \"\"\"\n", - " # try:\n", - " # 读取现有数据(假设第一行为列名)\n", - " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", - " print('文件中的数据列数:',len(df.columns),df.columns)\n", - " # 转换append_rows为DataFrame\n", - " if len(append_rows) != len(df.columns):\n", - " # 去除第二个元素 ,不知道什么原因多一个空数据\n", - " append_rows.pop(1)\n", - " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", - " # 创建新数据行\n", - " new_date = append_rows['日期'].values[0]\n", - " \n", - " dates = df['日期'].to_list()\n", - " # 判断日期是否存在\n", - " if new_date in dates:\n", - " # 找到日期所在行的索引\n", - " date_mask = df['日期'] == new_date\n", - " # 存在则更新数据\n", - " df.loc[date_mask] = append_rows.values\n", - " print(f\"更新 {new_date} 数据\")\n", - " else:\n", - " # 不存在则追加数据\n", - " df = pd.concat([df, append_rows], ignore_index=True)\n", - " print(df.head())\n", - " print(df.tail())\n", - " print(f\"插入 {new_date} 新数据\")\n", - " \n", - " # 保存更新后的数据\n", - " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " \n", - " # except FileNotFoundError:\n", - " # # 如果文件不存在则创建新文件\n", - " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", - " # except Exception as e:\n", - " # print(f\"保存数据时发生错误: {str(e)}\")\n", - "\n", - "\n", - "def main(start_date=None,token=None,token_push=None):\n", - " if start_date is None:\n", - " start_date = datetime.now()\n", - " if token is None:\n", - " token = get_head_auth()\n", - " if token_push is None:\n", - " token_push = get_head_push_auth()\n", - " date = start_date.strftime('%Y%m%d')\n", - " print(date)\n", - "# start(date)\n", - " # 更新当月数据\n", - " queryDataListItemNos(token)\n", - " # 训练模型\n", - " optimize_Model()\n", - " # # 预测&上传预测结果\n", - " upload_data_to_system(token_push,start_date)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20250522\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:615: DeprecationWarning:\n", - "\n", - "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "当月数据更新完成\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:298: UserWarning:\n", - "\n", - "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using matplotlib backend: inline\n", - "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", - "Populating the interactive namespace from numpy and matplotlib\n", - "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\EDY\\.conda\\envs\\jiageyuce\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n", - "\n", - "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n", - "`%matplotlib` prevents importing * from pylab and numpy\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best score: 0.997\n", - "Best parameters set:\n", - "\tlearning_rate: 0.1\n", - "\tmax_depth: 8\n", - "\tn_estimators: 90\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:238: UserWarning:\n", - "\n", - "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", - "\n", - "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20588\\2014406737.py:272: FutureWarning:\n", - "\n", - "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "日期\n", - "2025-05-22 3600.097412\n", - "Name: 日度预测价格, dtype: float32\n", - "{\"confirmFlg\":false,\"status\":true}\n" - ] - } - ], - "source": [ - "\n", - "# if __name__ == \"__main__\":\n", - "# print(\"运行中ing...\")\n", - "# # 每天定时12点运行\n", - "# while True:\n", - "# # 获取当前时间\n", - "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", - "# # print(current_time)\n", - "\n", - "# # 判断当前时间是否为执行任务的时间点\n", - "# try:\n", - "# if current_time == \"09:13:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# elif current_time == \"09:18:00\":\n", - "# print(\"执行定时任务\")\n", - "# main()\n", - "# time.sleep(1)\n", - "# except :\n", - "# print(f\"{current_time}任务失败\")\n", - "main()\n", - " # 检测数据准确性, 需要检测放开\n", - " # check_data(\"100028098|LISTING_PRICE\")\n", - " # check_data(\"9137070016544622XB|DAY_Yield\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# start_date = datetime(2025, 5, 16)\n", - "# end_date = datetime(2025, 5, 19)\n", - "# token = get_head_auth()\n", - "\n", - "# token_push = get_head_push_auth()\n", - "\n", - "# while start_date < end_date:\n", - "# main(start_date,token,token_push)\n", - "# start_date += timedelta(days=1)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "### 代码备份:\n", - "\n", - "\n", - "class codeBackup:\n", - "\n", - " \n", - "\n", - "\n", - " def write_xls(data,date):\n", - " # 创建一个Workbook对象\n", - " workbook = xlwt.Workbook()\n", - "\n", - " # 创建一个Sheet对象,可指定名称\n", - " sheet = workbook.load('Sheet1')\n", - "\n", - " # 写入数据行\n", - " for row_index, row_data in enumerate(data):\n", - " for col_index, cell_data in enumerate(row_data):\n", - " sheet.write(row_index, col_index, cell_data)\n", - "\n", - " # 保存Workbook到文件\n", - " workbook.save(get_cur_time(date)[0] + '.xls')\n", - "\n", - "\n", - " def start():\n", - " '''预测上传数据'''\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " token_push = get_head_push_auth()\n", - " if not token_push:\n", - " return\n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " save_xls(append_rows)\n", - " \n", - " # 获取当月的数据写入到指定文件\n", - " queryDataListItemNos(token)\n", - " optimize_Model()\n", - " upload_data_to_system(token_push)\n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n", - "\n", - "\n", - "\n", - "\n", - " def start_1():\n", - " '''更新数据'''\n", - " print(\"更新当天数据\")\n", - " read_xls_data()\n", - "\n", - " token = get_head_auth()\n", - " if not token:\n", - " return\n", - " \n", - "\n", - " datas = get_data_value(token, one_cols[1:])\n", - " if not datas:\n", - " return\n", - "\n", - " # data_list = [two_cols, one_cols]\n", - " append_rows = [get_cur_time()[1]]\n", - " dataItemNo_dataValue = {}\n", - " for data_value in datas:\n", - " if \"dataValue\" not in data_value:\n", - " print(data_value)\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", - " else:\n", - " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", - " \n", - " for value in one_cols[1:]:\n", - " if value in dataItemNo_dataValue:\n", - " append_rows.append(dataItemNo_dataValue[value])\n", - " else:\n", - " append_rows.append(\"\")\n", - " print(\"当天数据为:\",append_rows)\n", - " save_xls_1(append_rows)\n", - "\n", - " \n", - " # data_list.append(three_cols)\n", - " # write_xls(data_list)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/aisenzhecode/沥青/沥青定量每日执行.py b/aisenzhecode/沥青/沥青定量每日执行.py new file mode 100644 index 0000000..136a942 --- /dev/null +++ b/aisenzhecode/沥青/沥青定量每日执行.py @@ -0,0 +1,718 @@ +import requests +import json +from datetime import datetime, timedelta +import time +import pandas as pd + +# 变量定义 +login_url = "http://10.200.32.39/jingbo-api/api/server/login" +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" +queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos" + +login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" +upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" + +login_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +login_push_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +read_file_path_name = "沥青数据项.xlsx" +one_cols = [] +two_cols = [] + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sn +import random +import time + +from plotly import __version__ +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot + +from sklearn import preprocessing + +from pandas import Series,DataFrame + +import matplotlib.pyplot as plt + +import sklearn.datasets as datasets + +#导入机器学习算法模型 +from sklearn.linear_model import Lasso +from xgboost import XGBRegressor + +import statsmodels.api as sm +# from keras.preprocessing.sequence import TimeseriesGenerator +from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + +import plotly.express as px +import plotly.graph_objects as go + +import xgboost as xgb +from xgboost import plot_importance, plot_tree +from sklearn.metrics import mean_absolute_error +from statsmodels.tools.eval_measures import mse,rmse +from sklearn.model_selection import GridSearchCV +from xgboost import XGBRegressor +import warnings +import pickle + +from sklearn.metrics import mean_squared_error + +#切割训练数据和样本数据 +from sklearn.model_selection import train_test_split + +#用于模型评分 +from sklearn.metrics import r2_score + +le = preprocessing.LabelEncoder() + +# print(__version__) # requires version >= 1.9.0 + + +import cufflinks as cf +cf.go_offline() + +random.seed(100) + +# 数据获取 +def get_head_auth(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + +def get_data_value(token, dataItemNoList,date=''): + + search_data = { + "data": { + "date": getNow(date)[0], + "dataItemNoList": dataItemNoList + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + print("今天没有新数据") + return None + + +# xls文件处理 + +def getNow(date='', offset=0): + """生成指定日期的两种格式字符串 + Args: + date: 支持多种输入类型: + - datetime对象 + - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d') + - 空字符串表示当前日期 + offset: 日期偏移天数 + Returns: + tuple: (紧凑日期字符串, 标准日期字符串) + """ + # 日期解析逻辑 + from datetime import datetime,timedelta + if isinstance(date, datetime): + now = date + else: + now = datetime.now() + if date: + # 尝试多种日期格式解析 + for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'): + try: + now = datetime.strptime(str(date), fmt) + break + except ValueError: + continue + else: + raise ValueError(f"无法解析的日期格式: {date}") + + # 应用日期偏移 + now = now - timedelta(days=offset) + + # 统一格式化输出 + date_str = now.strftime("%Y-%m-%d") + compact_date = date_str.replace("-", "") + return compact_date, date_str + + +def get_head_push_auth(): + login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + +def upload_data_to_system(token_push,date): + data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100036|Forecast_Price|ACN", + "dataDate": getNow(date)[0], + "dataStatus": "add", + "dataValue": forecast_price() + } + + ] + } + headers = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + print(res.text) + +def forecast_price(): + df_test = pd.read_excel('沥青数据项.xlsx') + df_test.drop([0],inplace=True) + df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', + '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', + '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', + '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', + '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量', + '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', + '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', + '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', + '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', + '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float') + # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True) + df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True) + + #查看每个特征缺失值数量 + MisVal_Check=df_test.isnull().sum().sort_values(ascending=False) + #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 + df_MisVal_Check = pd.DataFrame(MisVal_Check,)# + df_MisVal_Check_1=df_MisVal_Check.reset_index() + df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] + df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test) + df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1) + + #将缺失值补为前一个或者后一个数值 + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + + # 选择用于模型训练的列名称 + col_for_training = df_test_1.columns + import joblib + Best_model_DalyLGPrice = joblib.load("日度价格预测_最佳模型.pkl") + # 最新的一天为最后一行的数据 + df_test_1_Day = df_test_1.tail(1) + # 移除不需要的列 + df_test_1_Day.index = df_test_1_Day["日期"] + df_test_1_Day = df_test_1_Day.drop(["日期"], axis= 1) + df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1) + df_test_1_Day=df_test_1_Day.dropna() + + # df_test_1_Day + #预测今日价格,显示至小数点后两位 + Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + + df_test_1_Day['日度预测价格']=Ypredict_Today + print(df_test_1_Day['日度预测价格']) + a = df_test_1_Day['日度预测价格'] + a = a[0] + a = float(a) + a = round(a,2) + return a +def optimize_Model(): + from sklearn.model_selection import train_test_split + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import OrdinalEncoder + from sklearn.feature_selection import SelectFromModel + from sklearn.metrics import mean_squared_error, r2_score + + pd.set_option('display.max_rows',40) + pd.set_option('display.max_columns',40) + df_test = pd.read_excel('沥青数据项.xlsx') + df_test.drop([0],inplace=True) + df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', + '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', + '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', + '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', + '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量', + '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价', + '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存', + '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存', + '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float') + # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python') + # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True) + df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True) + # df_test.tail(3) + MisVal_Check=df_test.isnull().sum().sort_values(ascending=False) + #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 + df_MisVal_Check = pd.DataFrame(MisVal_Check,)# + df_MisVal_Check_1=df_MisVal_Check.reset_index() + df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] + df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test) + df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1) + #将缺失值补为前一个或者后一个数值 + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1["日期"] = pd.to_datetime(df_test_1["日期"]) + df_test_1.index = df_test_1["日期"] + df_test_1 = df_test_1.drop(["日期"], axis= 1) + dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float) + + y=df_test_1['京博指导价'] + + x=dataset1 + + train = x + target = y + + #切割数据样本集合测试集 + X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) + + + from sklearn.linear_model import Lasso + from xgboost import XGBRegressor + + import statsmodels.api as sm + # from keras.preprocessing.sequence import TimeseriesGenerator + from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + + import plotly.express as px + import plotly.graph_objects as go + + import xgboost as xgb + from xgboost import plot_importance, plot_tree + from sklearn.metrics import mean_absolute_error + from statsmodels.tools.eval_measures import mse,rmse + from sklearn.model_selection import GridSearchCV + from xgboost import XGBRegressor + import warnings + import pickle + + from sklearn.metrics import mean_squared_error + + #切割训练数据和样本数据 + from sklearn.model_selection import train_test_split + + #用于模型评分 + from sklearn.metrics import r2_score + + #模型缩写 + Lasso = Lasso(random_state=0) + XGBR = XGBRegressor(random_state=0) + Lasso.fit(X_train,y_train) + XGBR.fit(X_train,y_train) + y_pre_Lasso = Lasso.predict(x_test) + y_pre_XGBR = XGBR.predict(x_test) + + #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true,y_pre_Lasso) + XGBR_score=r2_score(y_true,y_pre_XGBR) + + #计算Lasso、XGBR的MSE和RMSE + Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + + Lasso_RMSE=np.sqrt(Lasso_MSE) + XGBR_RMSE=np.sqrt(XGBR_MSE) + model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], + ['XgBoost', XGBR_RMSE, XGBR_score]], + columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) + model_results1=model_results.set_index('模型(Model)') + + def plot_feature_importance(importance,names,model_type): + feature_importance = np.array(importance) + feature_names = np.array(names) + + data={'feature_names':feature_names,'feature_importance':feature_importance} + fi_df = pd.DataFrame(data) + + fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + + plt.figure(figsize=(10,8)) + sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) + + plt.title(model_type + " "+'FEATURE IMPORTANCE') + plt.xlabel('FEATURE IMPORTANCE') + plt.ylabel('FEATURE NAMES') + from pylab import mpl + mpl.rcParams['font.sans-serif'] = ['SimHei'] + from xgboost import XGBRegressor + from sklearn.model_selection import GridSearchCV + + estimator = XGBRegressor(random_state=0, + nthread=4, + seed=0 + ) + parameters = { + 'max_depth': range (2, 11, 2), # 树的最大深度 + 'n_estimators': range (50, 101, 10), # 迭代次数 + 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] + } + + grid_search_XGB = GridSearchCV( + estimator=estimator, + param_grid=parameters, + # n_jobs = 10, + cv = 3, + verbose=True + ) + + grid_search_XGB.fit(X_train, y_train) + print("Best score: %0.3f" % grid_search_XGB.best_score_) + print("Best parameters set:") + best_parameters = grid_search_XGB.best_estimator_.get_params() + for param_name in sorted(parameters.keys()): + print("\t%s: %r" % (param_name, best_parameters[param_name])) + y_pred = grid_search_XGB.predict(x_test) + + op_XGBR_score = r2_score(y_true,y_pred) + op_XGBR_MSE= mean_squared_error(y_true, y_pred) + op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + + model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], + columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2=model_results2.set_index('模型(Model)') + + # results = model_results1.append(model_results2, ignore_index = False) + results = pd.concat([model_results1,model_results2],ignore_index=True) + results + import pickle + + Pkl_Filename = "日度价格预测_最佳模型.pkl" + + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) + +def read_xls_data(): + """获取特征项ID""" + global one_cols, two_cols + # 使用pandas读取Excel文件 + df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名 + # 获取第二行数据(索引为1) + one_cols = df.iloc[1].tolist()[1:] + print(f'获取到的数据项ID{one_cols}') + +def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): + + search_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": dateStart, + "dateEnd": dateEnd, + "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 + } + } + + headers = {"Authorization": token} + search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + return None + + +def save_queryDataListItemNos_xls(data_df,dataItemNoList): + from datetime import datetime,timedelta + current_year_month = datetime.now().strftime('%Y-%m') + grouped = data_df.groupby("dataDate") + + # 使用openpyxl打开xlsx文件 + from openpyxl import load_workbook + workbook = load_workbook('沥青数据项.xlsx') + + # 创建新工作簿 + new_workbook = load_workbook('沥青数据项.xlsx') + + for sheetname in workbook.sheetnames: + sheet = workbook[sheetname] + new_sheet = new_workbook[sheetname] + + current_year_month_row = 0 + # 查找当前月份数据起始行 + for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1): + if str(row[0]).startswith(current_year_month): + current_year_month_row += 1 + + # 追加新数据 + if sheetname == workbook.sheetnames[0]: + start_row = sheet.max_row - current_year_month_row + 1 + for row_idx, (date, group) in enumerate(grouped, start=start_row): + new_sheet.cell(row=row_idx, column=1, value=date) + for j, dataItemNo in enumerate(dataItemNoList, start=2): + if group[group["dataItemNo"] == dataItemNo]["dataValue"].values: + new_sheet.cell(row=row_idx, column=j, + value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + + # 保存修改后的xlsx文件 + new_workbook.save("沥青数据项.xlsx") + + +# def save_queryDataListItemNos_xls(data_df,dataItemNoList): +# from datetime import datetime, timedelta +# current_year_month = datetime.now().strftime('%Y-%m') +# grouped = data_df.groupby("dataDate") + +# # 打开xls文件 +# workbook = xlrd.open_workbook('沥青数据项.xlsx') + +# # 获取所有sheet的个数 +# sheet_count = len(workbook.sheet_names()) + +# # 获取所有sheet的名称 +# sheet_names = workbook.sheet_names() + +# new_workbook = xlwt.Workbook() +# for i in range(sheet_count): +# # 获取当前sheet +# sheet = workbook.sheet_by_index(i) + +# # 获取sheet的行数和列数 +# row_count = sheet.nrows +# col_count = sheet.ncols +# # 获取原有数据 +# data = [] +# for row in range(row_count): +# row_data = [] +# for col in range(col_count): +# row_data.append(sheet.cell_value(row, col)) +# data.append(row_data) + +# # 创建xlwt的Workbook对象 +# # 创建sheet +# new_sheet = new_workbook.add_sheet(sheet_names[i]) + + +# current_year_month_row = 0 +# # 将原有的数据写入新的sheet +# for row in range(row_count): +# for col in range(col_count): +# col0 = data[row][0] +# # print("col0",col0[:7]) +# if col0[:7] == current_year_month: +# current_year_month_row += 1 +# break +# new_sheet.write(row, col, data[row][col]) + + +# # print("current_year_month_row",current_year_month_row) +# if i == 0: +# rowFlag = 0 +# # 查看每组数据 +# for date, group in grouped: +# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date) +# for j in range(len(dataItemNoList)): +# dataItemNo = dataItemNoList[j] + +# # for dataItemNo in dataItemNoList: +# if group[group["dataItemNo"] == dataItemNo]["dataValue"].values: + +# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + +# rowFlag += 1 + + +# # 保存新的xls文件 +# new_workbook.save("沥青数据项.xlsx") + +def queryDataListItemNos(token=None): + df = pd.read_excel('沥青数据项.xlsx') + dataItemNoList = df.iloc[0].tolist()[1:] + + if token is None: + token = get_head_auth() + + if not token: + print('token获取失败') + return + + # 获取当前日期 + from datetime import datetime, timedelta + current_date = datetime.now() + + # 获取当月1日 + first_day_of_month = current_date.replace(day=1) + + # 格式化为 YYYYMMDD 格式 + dateEnd = current_date.strftime('%Y%m%d') + dateStart = first_day_of_month.strftime('%Y%m%d') + + search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + + + data_df = pd.DataFrame(search_value) + + data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) + data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') + save_queryDataListItemNos_xls(data_df,dataItemNoList) + print('当月数据更新完成') + + +def save_xls_1(append_rows): + + # 打开xls文件 + workbook = xlrd.open_workbook('沥青数据项.xlsx') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows - 1 + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("沥青数据项.xlsx") + + + +def start(date=''): + """获取当日数据""" + read_xls_data() + token = get_head_auth() + if not token: + return + + cur_time,cur_time2 = getNow(date) + print(f"获取{cur_time}数据") + datas = get_data_value(token, one_cols,date=cur_time) + print(len(datas)) + print(datas) + if not datas: + return + + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + + print('添加的行:',len(append_rows),append_rows) + save_xls_2(append_rows) + + +def save_xls_2(append_rows): + """保存或更新数据到Excel文件 + 参数: + append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...] + """ + # try: + # 读取现有数据(假设第一行为列名) + df = pd.read_excel('沥青数据项.xlsx', sheet_name=0) + print('文件中的数据列数:',len(df.columns),df.columns) + # 转换append_rows为DataFrame + if len(append_rows) != len(df.columns): + # 去除第二个元素 ,不知道什么原因多一个空数据 + append_rows.pop(1) + append_rows = pd.DataFrame([append_rows],columns=df.columns) + # 创建新数据行 + new_date = append_rows['日期'].values[0] + + dates = df['日期'].to_list() + # 判断日期是否存在 + if new_date in dates: + # 找到日期所在行的索引 + date_mask = df['日期'] == new_date + # 存在则更新数据 + df.loc[date_mask] = append_rows.values + print(f"更新 {new_date} 数据") + else: + # 不存在则追加数据 + df = pd.concat([df, append_rows], ignore_index=True) + print(df.head()) + print(df.tail()) + print(f"插入 {new_date} 新数据") + + # 保存更新后的数据 + df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl') + + # except FileNotFoundError: + # # 如果文件不存在则创建新文件 + # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl') + # except Exception as e: + # print(f"保存数据时发生错误: {str(e)}") + + +def main(start_date=None,token=None,token_push=None): + from datetime import datetime, timedelta + if start_date is None: + start_date = datetime.now() + if token is None: + token = get_head_auth() + if token_push is None: + token_push = get_head_push_auth() + date = start_date.strftime('%Y%m%d') + print(date) +# start(date) + # 更新当月数据 + queryDataListItemNos(token) + # 训练模型 + optimize_Model() + # # 预测&上传预测结果 + upload_data_to_system(token_push,start_date) + +if __name__ == "__main__": + print("运行中ing...") + main() \ No newline at end of file diff --git a/aisenzhecode/沥青/沥青数据项.xls b/aisenzhecode/沥青/沥青数据项.xls index 290b16e..e5b425d 100644 Binary files a/aisenzhecode/沥青/沥青数据项.xls and b/aisenzhecode/沥青/沥青数据项.xls differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx b/aisenzhecode/沥青/沥青数据项.xlsx index e5d9da4..d4401ff 100644 Binary files a/aisenzhecode/沥青/沥青数据项.xlsx and b/aisenzhecode/沥青/沥青数据项.xlsx differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87044 b/aisenzhecode/沥青/沥青数据项.xlsx.r87044 deleted file mode 100644 index bd31931..0000000 Binary files a/aisenzhecode/沥青/沥青数据项.xlsx.r87044 and /dev/null differ diff --git a/aisenzhecode/沥青/沥青数据项.xlsx.r87201 b/aisenzhecode/沥青/沥青数据项.xlsx.r87201 deleted file mode 100644 index cea746a..0000000 Binary files a/aisenzhecode/沥青/沥青数据项.xlsx.r87201 and /dev/null differ diff --git a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl index 50400c1..e4b4f9d 100644 Binary files a/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl and b/aisenzhecode/液化石油气/日度价格预测_液化气最佳模型.pkl differ diff --git a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb index a36ad46..6e14189 100644 --- a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb +++ b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -606,6 +606,7 @@ " return None\n", "\n", "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", + " from datetime import datetime, timedelta\n", " current_year_month = datetime.now().strftime('%Y-%m')\n", " grouped = data_df.groupby(\"dataDate\")\n", "\n", @@ -667,7 +668,9 @@ "\n", "\n", "\n", + "\n", "def main(start_date=None,token=None,token_push=None):\n", + " from datetime import datetime, timedelta\n", " if start_date is None:\n", " start_date = datetime.now()\n", " if token is None:\n", @@ -676,10 +679,14 @@ " token_push = get_head_push_auth()\n", " date = start_date.strftime('%Y%m%d')\n", " print(date)\n", - " # 更新当月数据\n", - " queryDataListItemNos(start_date,token)\n", - " # 更新当日数据\n", - "# start(date)\n", + " try:\n", + " # 更新当月数据\n", + " queryDataListItemNos(start_date,token)\n", + " except:\n", + " print('当月数据更新失败,单日更新')\n", + " start(date)\n", + " # 更新当日数据,批量日期更新时打开\n", + " # start(date)\n", " # 训练模型\n", " optimize_Model()\n", " # # 预测&上传预测结果\n", @@ -707,7 +714,744 @@ "name": "stdout", "output_type": "stream", "text": [ - "运行中ing...\n" + "运行中ing...\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0NzkxMjg3NywianRpIjoiM2Y0YTVjZjMxYjQ5NGI4MmEyODNhYmQ0ODY0NDdlODgifQ.9Pz1IswwjZrcBAkGQaF2DS8toGjSWdW7XM2ewSEnunY\n", + "20250522\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:160: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['datetime', 'random', '__version__', 'plot']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2025-05-22 4686.071289\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4686.07\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0Nzk5ODk3NywianRpIjoiM2FiNWY0YTMxYmVkNGIwNDhkODQ5ZTcxNzE4NGIzZjAifQ.7u4XG8lVrdLo0v8HkFuh-39LUQlvKndx-eehgaHgqiQ\n", + "20250523\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-23 4745.852051\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4745.85\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0Nzk5OTI3NywianRpIjoiZTVjMzMzYzdmMmYxNDRkNmJjOGUwMjM1NTE5Zjg1MDAifQ.k80aeQlgTPDdVxbmnB3RzjL2B2rvsDrphhJFRRVQ7hc\n", + "20250523\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-23 4745.852051\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4745.85\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODA4NTM3OCwianRpIjoiMmE1NzA0NGE0OTM5NGFmODk5MGYzZGIxYTU2MjlkNjgifQ.smTWAknViqKT89RK6wfecUU6RVz-rk-lk__RMXEAd7U\n", + "20250524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-24 4733.198242\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4733.2\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODA4NTY3NywianRpIjoiOWQ3YTc5NDQ5MzRkNDNmNzllZmUxZmQxN2EzYmQ2ZTAifQ.p1kAb2XS4zXsX4L9LxYx_WARMXwGBsSi6-0fLgPHwI8\n", + "20250524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-24 4733.198242\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4733.2\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODE3MTc3OCwianRpIjoiYmJjZGRkODg5YTJmNDJhMzg0MDg2NjIxNWE4OTM2ZTQifQ.5kl8fb4i-GLc9gwkoAZg2sFYUJRoe2_DcbTPEIUvfKc\n", + "20250525\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-24 4702.740723\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4702.74\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODE3MjA3OCwianRpIjoiYjcxYzhjNTBlOTE0NGIyZmFjNTViOGFiMGFkMjg4MDcifQ.BdY7aAgJLFALAb6ZEaxsUdXdZBD9kKL4cH2usKKoTuM\n", + "20250525\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-24 4702.740723\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4702.74\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODI1ODE3OCwianRpIjoiMzEwYjk2OTg1YzY3NGJkMTliNWQ5Njg0MjJlZDEwMDIifQ.hWMl89A0qXItRw4ilOjF5c3q5zGYG4WkvFggkxPB4qI\n", + "20250526\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-26 4657.152344\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4657.15\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODI1ODQ3OCwianRpIjoiNGE0YjY5OTlmYmUyNDNiYmJhNDc3NTU3ZjYzMDQyMWIifQ.Vfp9D9sHUC_Z-E7dwb-z-Saw-YgvIuEsyfCa1HAO3tA\n", + "20250526\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-26 4657.152344\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4657.15\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODM0NDU3OCwianRpIjoiYTMxZDdmZWRlNzk2NDA1YTllMDM3NTU0ZTg2ZTEzOGYifQ.DeLFIsqo8NodaW9t98K08nHF_X_uV5dQgkuG7fwMBis\n", + "20250527\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-27 4700.042969\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4700.04\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODM0NDg3OCwianRpIjoiZmE3NjFiNDE4N2EzNDAzODhjOTkzMDNjYmY2NmZkMzMifQ.0DqOj6VtXfRKPfTRrnWQawd2QqC5U_VdjUohWQxLkg4\n", + "20250527\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-27 4700.042969\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4700.04\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODQzMDk3OSwianRpIjoiNWJjNjUxMTY3NWY1NDUzZWIwMzEzNTEwZjAzYThmZTIifQ.ZKJZ0MKm9o1ygfdf7K0OTrh5BKqXqxuHqSIyj-o_D5Q\n", + "20250528\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-28 4684.207031\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4684.21\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODQzMTI3OSwianRpIjoiZWU4YjA2NWEzZTIyNDg3MWE1NGJhMTg4NTE4MTM0MDMifQ.tfZ8OEM_lBg20OaMQB-U9LzeLaDFTlRMHYhJyW2lWB8\n", + "20250528\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-28 4684.207031\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4684.21\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODUxNzM3OSwianRpIjoiZDdmYjg3NDY4ZGRkNDYyNmFiYjE4YTdjZjdiZTkyN2UifQ.hz0SzEtdaGwgJHWf8XheBXYJSU06eFb5DbYAhnLYCmI\n", + "20250529\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-29 4700.171875\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4700.17\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODUxNzY4MSwianRpIjoiZjVmM2JmMDE3ZjE2NDMzMTg0MDZjODZkNWY1Mzk1YTMifQ.-gvwjjIg5Bng8ceyZ8Jsb-zPQP9qW_cwATowtL6HYMQ\n", + "20250529\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-29 4700.171875\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4700.17\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODYwMzc4MCwianRpIjoiYThlYTEzMWI0NzA5NDE4Y2E0NWIwZWQzMDNhZDAyZWMifQ.VtKVo59zkqa2xomzhigfqRsIIg7lD_tAQO8pgVt_Luc\n", + "20250530\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-30 4651.634277\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4651.63\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODYwNDA4MCwianRpIjoiYzY2MWI4MWMyNDk4NDY0Y2JiZDkxZGZhOGMyZGUxNTcifQ.lS7jRPhQ4M1TQlsTTFEfR3f4cwLfH98h4qOmBrKgio0\n", + "20250530\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-30 4651.634277\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4651.63\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODY5MDE4MCwianRpIjoiNjFjMGEwMTgxNTc2NDFlZjgxZDIxNTZkMGU1MjhiZDcifQ.snloJkEdEctvxKjuAnonz2yRFumN434ub4mg0jHTP1M\n", + "20250531\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-31 4600.956543\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4600.96\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODY5MDQ4MCwianRpIjoiMWI2OGM5MzRhYWQxNGI2YTk5MGEwZGQyZDdjYjcwYzEifQ.3IW-0YoJg-kv9l0w4iHcsKAcFb2BL7EZIFDnBxtuIqM\n", + "20250531\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-05-31 4600.956543\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4600.96\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODc3NjU4MSwianRpIjoiMTE5ODg4Zjc3NGEwNGE1ZDk1MmIyNWJiYzA1OTY2OWUifQ.SxQgQjsREFVyeiggyzc2kVHy_UAH_z9saIXUkX9JXoM\n", + "20250601\n", + "09:15:00执行失败\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODc3Njg4MCwianRpIjoiOTk0NTRmMDcwMTdiNGIzM2JjMzA5ZWM5NjNiOWY1MjAifQ.8ir_vmh4R9AD-hAL1RjBXwVtPcuzhMLK0Yxf1c0ugXI\n", + "20250601\n", + "09:20:00执行失败\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODc3Njg4MCwianRpIjoiM2QzOGQ3Y2MzYzhlNGRjNWI4MjliMWM3ZGU1NzNiNjYifQ.EsDazleLcFiWS0cummkTv_0w8Eba0Q1o0tzRG82DjuQ\n", + "20250601\n", + "09:20:00执行失败\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODc3Njg4MSwianRpIjoiY2I0NjE2NDUxM2QxNGNkNjg5ZWY1YmNmZDFmNTMyMjQifQ.MdZEinDQ8g0dS-kz9Ffjr-a_THlssHY2tUG3afSFvF4\n", + "20250601\n", + "09:20:00执行失败\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODc3Njg4MSwianRpIjoiNzM5MWZiNzdlMmJkNDA1N2FlNGE1MzQ4NTc0NjA4NGYifQ.eFqVfiQFOg45CSJD1liw7-bBrGQu-Bvmh3K4x2WTb3o\n", + "20250601\n", + "09:20:00执行失败\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODg2Mjk4MSwianRpIjoiYmVlZWQwZWZjMzEyNDA2Yjg0NTAzZTMxMTNiMDA1OWYifQ.TmWrq7FOObj2HpzV-xLDmwuj1al9n6mgk_fljxJx22Q\n", + "20250602\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-02 4594.924316\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4594.92\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODg2MzI4MSwianRpIjoiMzE5NzcxNDMyZmQ5NGE0MWIwNTYzMzY2ZjQ4NTgxMzUifQ.vnVbDAkT75p6MIUZb6yDC7s8yHuDO6HWL0bW735D8Zg\n", + "20250602\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-02 4594.924316\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4594.92\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODk0OTM4MiwianRpIjoiZjkwZmZmMGQwNzJlNDc4NmE2ODBkYjkxYWU1OGFjOTkifQ.ZxzSHX9AuEx0F8_ZTK9UN_kKZoC2KC_fmaW333JlGwQ\n", + "20250603\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-03 4627.078125\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4627.08\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0ODk0OTY4MywianRpIjoiYTBkNmU3YTU5NmRiNDBjYzk1NmFiOTkwZTY5ZjYyZWIifQ.vAXcKM8JkBMXTcTQ6EONidFybyVdNTz2ClxFEYJZBEo\n", + "20250603\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-03 4627.078125\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4627.08\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0OTAzNTc4MiwianRpIjoiMjZjZjFmZTMzYzE1NDIxZWE3M2M5ZDk3NWZhMzZjYmUifQ.MI_Zs0NN4n4IqMxzP_JK-2EUPjZDBT-hUepkXOzAVR4\n", + "20250604\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-04 4696.86084\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4696.86\n", + "执行定时任务\n", + "获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0OTAzNjA4MiwianRpIjoiM2RiZjNjNTYwYmM0NGM3ZDgzYmViZTBlYTQ5MGVmMjkifQ.MuXgas16l6swMv0KVRsWT9pBwFYWGroR9D38lx4A5mI\n", + "20250604\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:603: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: Qt5Agg\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n", + "Date\n", + "2025-06-04 4696.86084\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "预测值: 4696.86\n" ] } ], @@ -730,7 +1474,8 @@ " time.sleep(1)\n", " except:\n", " print(f\"{current_time}执行失败\")\n", - "# main() \n", + "# main() \n", + "# main() \n", " # 检测数据准确性, 需要检测放开\n", " # check_data(\"100028098|LISTING_PRICE\")\n", " # check_data(\"9137070016544622XB|DAY_Yield\")\n" @@ -771,18 +1516,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" } }, "nbformat": 4, diff --git a/aisenzhecode/液化石油气/液化气数据.xlsx b/aisenzhecode/液化石油气/液化气数据.xlsx index 129d918..9162c21 100644 Binary files a/aisenzhecode/液化石油气/液化气数据.xlsx and b/aisenzhecode/液化石油气/液化气数据.xlsx differ diff --git a/aisenzhecode/液化石油气/液化气每日价格预测.py b/aisenzhecode/液化石油气/液化气每日价格预测.py new file mode 100644 index 0000000..44aff4b --- /dev/null +++ b/aisenzhecode/液化石油气/液化气每日价格预测.py @@ -0,0 +1,663 @@ +import requests +import json + +from datetime import datetime,timedelta + +# 变量定义 +login_url = "http://10.200.32.39/jingbo-api/api/server/login" +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" +queryDataListItemNos_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos" + + +login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" +upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" + +login_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +login_push_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +read_file_path_name = "液化气数据.xlsx" +one_cols = [] +two_cols = [] + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sn +import random +import time + +from plotly import __version__ +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot + +from sklearn import preprocessing + +from pandas import Series,DataFrame + +import matplotlib.pyplot as plt + +import sklearn.datasets as datasets + +#导入机器学习算法模型 +from sklearn.linear_model import Lasso +from xgboost import XGBRegressor + +import statsmodels.api as sm +try: + from keras.preprocessing.sequence import TimeseriesGenerator +except: + from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + +import plotly.express as px +import plotly.graph_objects as go + +import xgboost as xgb +from xgboost import plot_importance, plot_tree +from sklearn.metrics import mean_absolute_error +from statsmodels.tools.eval_measures import mse,rmse +from sklearn.model_selection import GridSearchCV +from xgboost import XGBRegressor +import warnings +import pickle + +from sklearn.metrics import mean_squared_error + +#切割训练数据和样本数据 +from sklearn.model_selection import train_test_split + +#用于模型评分 +from sklearn.metrics import r2_score + +le = preprocessing.LabelEncoder() + +# print(__version__) # requires version >= 1.9.0 + + +import cufflinks as cf +cf.go_offline() + +random.seed(100) + + +# 数据获取 + +def get_head_auth(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + print('获取的token:',token) + return token + else: + print("获取认证失败") + return None + + +def get_data_value(token, dataItemNoList,date): + search_data = { + "data": { + "date": date, + "dataItemNoList": dataItemNoList + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + + + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + print('数据项查询参数search_data:') + print(search_data) + print('数据项查询结果search_res:') + print(search_res.text) + + try: + search_value = json.loads(search_res.text)["data"] + + print("数据项查询结果:", search_value) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + print("Response content:", search_res.text) + return None + if search_value: + return search_value + else: + print("今天没有新数据") + return search_value + + +def get_head_push_auth(): + login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + + +def upload_data_to_system(token_push,date): + data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "250855713|Forecast_Price|ACN", + "dataDate": getNow(date=date)[0], + "dataStatus": "add", + "dataValue": forecast_price() + } + + ] + } + headers = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + print(res.text) + print('预测值:',data['data'][0]['dataValue']) + + +price_list = [] + +def forecast_price(): + # df_test = pd.read_csv('定价模型数据收集0212.csv') + df_test = pd.read_excel('液化气数据.xlsx') + df_test.drop([0],inplace=True) + try: + df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + except: + df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) + + + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + + # 选择用于模型训练的列名称 + col_for_training = df_test_1.columns + + + + + import joblib + Best_model_DalyLGPrice = joblib.load("日度价格预测_液化气最佳模型.pkl") + # 最新的一天为最后一行的数据 + + df_test_1_Day = df_test_1.tail(1) + # 移除不需要的列 + df_test_1_Day.index = df_test_1_Day["Date"] + df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1) + df_test_1_Day=df_test_1_Day.drop('Price',axis=1) + df_test_1_Day=df_test_1_Day.dropna() + + for col in df_test_1_Day.columns: + df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce') + #预测今日价格,显示至小数点后两位 + Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + + df_test_1_Day['日度预测价格']=Ypredict_Today + print(df_test_1_Day['日度预测价格']) + a = df_test_1_Day['日度预测价格'] + a = a[0] + a = float(a) + a = round(a,2) + price_list.append(a) + return a +def optimize_Model(): + from sklearn.model_selection import train_test_split + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import OrdinalEncoder + from sklearn.feature_selection import SelectFromModel + from sklearn.metrics import mean_squared_error, r2_score + import pandas as pd + + pd.set_option('display.max_rows',40) + pd.set_option('display.max_columns',40) + df_test = pd.read_excel('液化气数据.xlsx') + df_test.drop([0],inplace=True) + try: + df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + except: + df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) + + + #将缺失值补为前一个或者后一个数值 + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1["Date"] = pd.to_datetime(df_test_1["Date"]) + df_test_1.index = df_test_1["Date"] + df_test_1 = df_test_1.drop(["Date"], axis= 1) + df_test_1 = df_test_1.astype('float') + + + import numpy as np + import pandas as pd + from pandas import Series,DataFrame + + import matplotlib.pyplot as plt + + import sklearn.datasets as datasets + + #导入机器学习算法模型 + from sklearn.linear_model import Lasso + from xgboost import XGBRegressor + + import statsmodels.api as sm + try: + from keras.preprocessing.sequence import TimeseriesGenerator + except: + from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + + import plotly.express as px + import plotly.graph_objects as go + + import xgboost as xgb + from xgboost import plot_importance, plot_tree + from sklearn.metrics import mean_absolute_error + from statsmodels.tools.eval_measures import mse,rmse + from sklearn.model_selection import GridSearchCV + from xgboost import XGBRegressor + import warnings + import pickle + + from sklearn.metrics import mean_squared_error + + #切割训练数据和样本数据 + from sklearn.model_selection import train_test_split + + #用于模型评分 + from sklearn.metrics import r2_score + + dataset1=df_test_1.drop('Price',axis=1)#.astype(float) + + y=df_test_1['Price'] + + x=dataset1 + + train = x + target = y + + #切割数据样本集合测试集 + X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) + + #模型缩写 + Lasso = Lasso(random_state=0) + XGBR = XGBRegressor(random_state=0) + #训练模型 + Lasso.fit(X_train,y_train) + XGBR.fit(X_train,y_train) + #模型拟合 + y_pre_Lasso = Lasso.predict(x_test) + y_pre_XGBR = XGBR.predict(x_test) + + #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true,y_pre_Lasso) + XGBR_score=r2_score(y_true,y_pre_XGBR) + + #计算Lasso、XGBR的MSE和RMSE + Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + + Lasso_RMSE=np.sqrt(Lasso_MSE) + XGBR_RMSE=np.sqrt(XGBR_MSE) + # 将不同模型的不同误差值整合成一个表格 + model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], + ['XgBoost', XGBR_RMSE, XGBR_score]], + columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) + #将模型名称(Model)列设置为索引 + model_results1=model_results.set_index('模型(Model)') + + model_results1 + #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整 + def plot_feature_importance(importance,names,model_type): + feature_importance = np.array(importance) + feature_names = np.array(names) + + data={'feature_names':feature_names,'feature_importance':feature_importance} + fi_df = pd.DataFrame(data) + + fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + + plt.figure(figsize=(10,8)) + sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) + + plt.title(model_type + " "+'FEATURE IMPORTANCE') + plt.xlabel('FEATURE IMPORTANCE') + plt.ylabel('FEATURE NAMES') + from pylab import mpl + mpl.rcParams['font.sans-serif'] = ['SimHei'] + ## Xgboost 模型参数优化-初步 + #参考: https://juejin.im/post/6844903661013827598 + #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可) + + from xgboost import XGBRegressor + from sklearn.model_selection import GridSearchCV + + estimator = XGBRegressor(random_state=0, + nthread=4, + seed=0 + ) + parameters = { + 'max_depth': range (2, 11, 2), # 树的最大深度 + 'n_estimators': range (50, 101, 10), # 迭代次数 + 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] + } + + grid_search_XGB = GridSearchCV( + estimator=estimator, + param_grid=parameters, + # n_jobs = 10, + cv = 3, + verbose=True + ) + + grid_search_XGB.fit(X_train, y_train) + #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行 + + best_parameters = grid_search_XGB.best_estimator_.get_params() + y_pred = grid_search_XGB.predict(x_test) + + op_XGBR_score = r2_score(y_true,y_pred) + op_XGBR_MSE= mean_squared_error(y_true, y_pred) + op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + + model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], + columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2=model_results2.set_index('模型(Model)') + + try: + results = model_results1.append(model_results2, ignore_index = False) + except: + results = pd.concat([model_results1,model_results2],ignore_index=True) + import pickle + + Pkl_Filename = "日度价格预测_液化气最佳模型.pkl" + + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) + +def read_xls_data(): + """获取特征项ID""" + global one_cols, two_cols + # 使用pandas读取Excel文件 + df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名 + # 获取第二行数据(索引为1) + one_cols = df.iloc[1].tolist()[1:] + print(f'获取到的数据项ID{one_cols}') + + +def start(date=''): + """获取当日数据""" + read_xls_data() + token = get_head_auth() + if not token: + return + + cur_time,cur_time2 = getNow(date) + print(f"获取{cur_time}数据") + datas = get_data_value(token, one_cols,date=cur_time) + if not datas: + return + + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + print('添加的行:',append_rows) + save_xls_2(append_rows) + + +def getNow(date='', offset=0): + """生成指定日期的两种格式字符串 + Args: + date: 支持多种输入类型: + - datetime对象 + - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d') + - 空字符串表示当前日期 + offset: 日期偏移天数 + Returns: + tuple: (紧凑日期字符串, 标准日期字符串) + """ + # 日期解析逻辑 + from datetime import datetime,timedelta + if isinstance(date, datetime): + now = date + else: + now = datetime.now() + if date: + # 尝试多种日期格式解析 + for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'): + try: + now = datetime.strptime(str(date), fmt) + break + except ValueError: + continue + else: + raise ValueError(f"无法解析的日期格式: {date}") + + # 应用日期偏移 + now = now - timedelta(days=offset) + + # 统一格式化输出 + date_str = now.strftime("%Y-%m-%d") + compact_date = date_str.replace("-", "") + return compact_date, date_str + +def start_1(date=''): + """补充昨日数据""" + read_xls_data() + token = get_head_auth() + if not token: + return + + cur_time,cur_time2 = getNow(date,offset=1) + print(f"补充{cur_time}数据") + datas = get_data_value(token, one_cols,date=cur_time) + if not datas: + print(f"{cur_time}没有数据") + return + + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + print('添加的行:',append_rows) + save_xls_2(append_rows) + + +def save_xls_2(append_rows): + """保存或更新数据到Excel文件 + 参数: + append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...] + """ + try: + # 读取现有数据(假设第一行为列名) + df = pd.read_excel('液化气数据.xlsx', sheet_name=0) + # 转换append_rows为DataFrame + append_rows = pd.DataFrame([append_rows],columns=df.columns) + # 创建新数据行 + new_date = append_rows['Date'].values[0] + + dates = df['Date'].to_list() + # 判断日期是否存在 + if new_date in dates: + # 找到日期所在行的索引 + date_mask = df['Date'] == new_date + # 存在则更新数据 + df.loc[date_mask] = append_rows.values + print(f"更新 {new_date} 数据") + else: + # 不存在则追加数据 + df = pd.concat([df, append_rows], ignore_index=True) + print(df.head()) + print(df.tail()) + print(f"插入 {new_date} 新数据") + + # 保存更新后的数据 + df.to_excel('液化气数据.xlsx', index=False, engine='openpyxl') + + except FileNotFoundError: + # 如果文件不存在则创建新文件 + pd.DataFrame([append_rows]).to_excel('液化气数据.xlsx', index=False, engine='openpyxl') + except Exception as e: + print(f"保存数据时发生错误: {str(e)}") + +def check_data(dataItemNo): + token = get_head_auth() + if not token: + return + + datas = get_data_value(token, dataItemNo) + if not datas: + return + +def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): + + search_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": dateStart, + "dateEnd": dateEnd, + "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 + } + } + + headers = {"Authorization": token} + search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + return None + +def save_queryDataListItemNos_xls(data_df,dataItemNoList): + from datetime import datetime, timedelta + current_year_month = datetime.now().strftime('%Y-%m') + grouped = data_df.groupby("dataDate") + + # 使用openpyxl打开xlsx文件 + from openpyxl import load_workbook + workbook = load_workbook('液化气数据.xlsx') + + # 创建新工作簿 + new_workbook = load_workbook('液化气数据.xlsx') + + for sheetname in workbook.sheetnames: + sheet = workbook[sheetname] + new_sheet = new_workbook[sheetname] + + current_year_month_row = 0 + # 查找当前月份数据起始行 + for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1): + if str(row[0]).startswith(current_year_month): + current_year_month_row += 1 + + # 追加新数据 + if sheetname == workbook.sheetnames[0]: + start_row = sheet.max_row - current_year_month_row + 1 + for row_idx, (date, group) in enumerate(grouped, start=start_row): + new_sheet.cell(row=row_idx, column=1, value=date) + for j, dataItemNo in enumerate(dataItemNoList, start=2): + if group[group["dataItemNo"] == dataItemNo]["dataValue"].values: + new_sheet.cell(row=row_idx, column=j, + value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + + # 保存修改后的xlsx文件 + new_workbook.save("液化气数据.xlsx") + + +def queryDataListItemNos(date=None,token=None): + df = pd.read_excel('液化气数据.xlsx') + dataItemNoList = df.iloc[0].tolist()[1:] + if token is None: + token = get_head_auth() + if not token: + print('token获取失败') + return + # 获取当前日期 + if date is None: + current_date = datetime.now() + else: + current_date = date + # 获取当月1日 + first_day_of_month = current_date.replace(day=1) + # 格式化为 YYYYMMDD 格式 + dateEnd = current_date.strftime('%Y%m%d') + dateStart = first_day_of_month.strftime('%Y%m%d') + search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + data_df = pd.DataFrame(search_value) + data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) + data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') + save_queryDataListItemNos_xls(data_df,dataItemNoList) + print('当月数据更新完成') + + + + +def main(start_date=None,token=None,token_push=None): + from datetime import datetime, timedelta + if start_date is None: + start_date = datetime.now() + if token is None: + token = get_head_auth() + if token_push is None: + token_push = get_head_push_auth() + date = start_date.strftime('%Y%m%d') + print(date) + try: + # 更新当月数据 + queryDataListItemNos(start_date,token) + except: + print('当月数据更新失败,单日更新') + start(date) + # 更新当日数据,批量日期更新时打开 + # start(date) + # 训练模型 + optimize_Model() + # # 预测&上传预测结果 + upload_data_to_system(token_push,start_date) + + +if __name__ == "__main__": + print("运行中ing...") + main() diff --git a/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl b/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl index 884f380..2f7e1d4 100644 Binary files a/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl and b/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl differ diff --git a/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb b/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb index 5423f17..24dac3f 100644 --- a/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb +++ b/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb @@ -12,31 +12,14 @@ "text/html": [ " \n", + " \n", " " ] }, "metadata": {}, "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "运行中ing\n" - ] } ], "source": [ @@ -865,12 +848,97 @@ "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20250601\n", + "获取到的数据项ID['数据项编码', 'C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n", + "获取到的数据项ID['C01100047|STRIKE_PRICE', 'Brentspj', '913716251671540959|EXCHANGE_RATE', 'C01100010|LIST_PRICE01', '250326561|STRIKE_PRICE', 'C01100047|LIST_PRICE', 'C01100047|LIST_PRICE-1', 'C01100047|LIST_PRICE-01', 'OIL_CHEM|guonei|6097|PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370783724809024G|BEN|PRICE', '91370500737223620X|BEN|PRICE', '91370503706169019D|BEN|PRICE', '91370503164840647R|BEN|PRICE', 'C01100047|TURNOVER', '913705221649223519|C01100047|EXW', 'C01100047|CAPACITY']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:753: DeprecationWarning:\n", + "\n", + "The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:302: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:307: FutureWarning:\n", + "\n", + "Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:308: FutureWarning:\n", + "\n", + "Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "\n", + "c:\\Users\\EDY\\.conda\\envs\\predict\\lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:695: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 7.936e+05, tolerance: 5.806e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "当月数据更新完成\n", + "Using matplotlib backend: inline\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\EDY\\.conda\\envs\\predict\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:166: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['datetime', '__version__', 'random', 'plot']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:255: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:259: FutureWarning:\n", + "\n", + "Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:260: FutureWarning:\n", + "\n", + "Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "\n", + "C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_20756\\2211479266.py:285: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2025-06-04 5826.140137\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20250601', 'dataStatus': 'add', 'dataValue': np.float64(5826.14)}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n" + ] + } + ], "source": [ "# # # 自定义日期执行预测\n", "\n", - "# start_date = datetime(2025, 5, 16)\n", - "# end_date = datetime(2025, 5, 19)\n", + "# start_date = datetime(2025, 6, 1)\n", + "# end_date = datetime(2025, 6, 2)\n", "\n", "# token = get_head_auth()\n", "# token_push = get_head_push_auth()\n", @@ -892,7 +960,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "predict", "language": "python", "name": "python3" }, @@ -906,7 +974,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/aisenzhecode/石油苯/纯苯数据项.xls b/aisenzhecode/石油苯/纯苯数据项.xls index 10ec6b1..b7039c9 100644 Binary files a/aisenzhecode/石油苯/纯苯数据项.xls and b/aisenzhecode/石油苯/纯苯数据项.xls differ diff --git a/aisenzhecode/石油苯/纯苯每日价格预测.py b/aisenzhecode/石油苯/纯苯每日价格预测.py new file mode 100644 index 0000000..5ce59a6 --- /dev/null +++ b/aisenzhecode/石油苯/纯苯每日价格预测.py @@ -0,0 +1,794 @@ +import requests +import json +import xlrd +import xlwt +from datetime import datetime,timedelta +import time +# 变量定义 +login_url = "http://10.200.32.39/jingbo-api/api/server/login" +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" + +login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" +upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" +queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos" + +login_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +login_push_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +read_file_path_name = "纯苯数据项.xls" +one_cols = [] +two_cols = [] + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sn +import random +import time + + + + +from plotly import __version__ +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot + +from sklearn import preprocessing + +from pandas import Series,DataFrame + +import matplotlib.pyplot as plt + +import sklearn.datasets as datasets + +#导入机器学习算法模型 +from sklearn.linear_model import Lasso +from xgboost import XGBRegressor + +import statsmodels.api as sm +try: + from keras.preprocessing.sequence import TimeseriesGenerator +except: + from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + +import plotly.express as px +import plotly.graph_objects as go + +import xgboost as xgb +from xgboost import plot_importance, plot_tree +from sklearn.metrics import mean_absolute_error +from statsmodels.tools.eval_measures import mse,rmse +from sklearn.model_selection import GridSearchCV +from xgboost import XGBRegressor +import warnings +import pickle + +from sklearn.metrics import mean_squared_error + +#切割训练数据和样本数据 +from sklearn.model_selection import train_test_split + +#用于模型评分 +from sklearn.metrics import r2_score + +le = preprocessing.LabelEncoder() + +# print(__version__) # requires version >= 1.9.0 + + +import cufflinks as cf +cf.go_offline() + +random.seed(100) + + + +# 数据获取 + +def get_head_auth(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + +def get_data_value(token, dataItemNoList,date): + search_data = { + "data": { + "date": getNow(date)[0], + "dataItemNoList": dataItemNoList + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + print("今天没有新数据") + return search_value + + +# xls文件处理 + + + + + +def write_xls(data,date): + # 创建一个Workbook对象 + workbook = xlwt.Workbook() + + # 创建一个Sheet对象,可指定名称 + sheet = workbook.load('Sheet1') + + # 写入数据行 + for row_index, row_data in enumerate(data): + for col_index, cell_data in enumerate(row_data): + sheet.write(row_index, col_index, cell_data) + + # 保存Workbook到文件 + workbook.save(get_cur_time(date)[0] + '.xls') + + + +def getNow(date='', offset=0): + """生成指定日期的两种格式字符串 + Args: + date: 支持多种输入类型: + - datetime对象 + - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d') + - 空字符串表示当前日期 + offset: 日期偏移天数 + Returns: + tuple: (紧凑日期字符串, 标准日期字符串) + """ + # 日期解析逻辑 + from datetime import datetime,timedelta + if isinstance(date, datetime): + now = date + else: + now = datetime.now() + if date: + # 尝试多种日期格式解析 + for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'): + try: + now = datetime.strptime(str(date), fmt) + break + except ValueError: + continue + else: + raise ValueError(f"无法解析的日期格式: {date}") + + # 应用日期偏移 + now = now - timedelta(days=offset) + + # 统一格式化输出 + date_str = now.strftime("%Y-%m-%d") + compact_date = date_str.replace("-", "") + return compact_date, date_str + + +def get_cur_time(date=''): + if date == '': + now = datetime.now() + # 如果是字符串,尝试解析日期 + elif isinstance(date, str): + now = datetime.strptime(date, '%Y-%m-%d') + else: + now = date + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + return cur_time, cur_time2 + + +def get_head_push_auth(): + login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + + +def upload_data_to_system(token_push,date): + datavalue = forecast_price() + data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100047|FORECAST_PRICE", + "dataDate": getNow(date)[0], + "dataStatus": "add", + "dataValue": datavalue + } + + ] + } + print(data) + headers = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + print(res.text) + + + +def forecast_price(): + # df_test = pd.read_csv('定价模型数据收集0212.csv') + df_test = pd.read_excel('纯苯数据项.xls',sheet_name='Sheet1') + df_test.drop([0],inplace=True) + # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) + + + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + + # 选择用于模型训练的列名称 + col_for_training = df_test_1.columns + + + import joblib + Best_model_DalyLGPrice = joblib.load("日度价格预测_最佳模型.pkl") + # 最新的一天为最后一行的数据 + + df_test_1_Day = df_test_1.tail(1) + # 移除不需要的列 + df_test_1_Day.index = df_test_1_Day["Date"] + df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1) + df_test_1_Day=df_test_1_Day.drop('Price',axis=1) + df_test_1_Day=df_test_1_Day.dropna() + + for col in df_test_1_Day.columns: + df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce') + #预测今日价格,显示至小数点后两位 + Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + + df_test_1_Day['日度预测价格']=Ypredict_Today + print(df_test_1_Day['日度预测价格']) + a = df_test_1_Day['日度预测价格'] + a = a[0] + a = float(a) + a = round(a,2) + return a +def optimize_Model(): + from sklearn.model_selection import train_test_split + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import OrdinalEncoder + from sklearn.feature_selection import SelectFromModel + from sklearn.metrics import mean_squared_error, r2_score + import pandas as pd + + pd.set_option('display.max_rows',40) + pd.set_option('display.max_columns',40) + df_test = pd.read_excel('纯苯数据项.xls') + df_test.drop([0],inplace=True) + # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + df_test['Date']=pd.to_datetime(df_test['Date'], format='%Y-%m-%d',infer_datetime_format=True) + + + #将缺失值补为前一个或者后一个数值 + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1["Date"] = pd.to_datetime(df_test_1["Date"]) + df_test_1.index = df_test_1["Date"] + df_test_1 = df_test_1.drop(["Date"], axis= 1) + df_test_1 = df_test_1.astype('float') + + + import numpy as np + import pandas as pd + from pandas import Series,DataFrame + + import matplotlib.pyplot as plt + + import sklearn.datasets as datasets + + #导入机器学习算法模型 + from sklearn.linear_model import Lasso + from xgboost import XGBRegressor + + import statsmodels.api as sm + try: + from keras.preprocessing.sequence import TimeseriesGenerator + except: + from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator + + import plotly.express as px + import plotly.graph_objects as go + + import xgboost as xgb + from xgboost import plot_importance, plot_tree + from sklearn.metrics import mean_absolute_error + from statsmodels.tools.eval_measures import mse,rmse + from sklearn.model_selection import GridSearchCV + from xgboost import XGBRegressor + import warnings + import pickle + + from sklearn.metrics import mean_squared_error + + #切割训练数据和样本数据 + from sklearn.model_selection import train_test_split + + #用于模型评分 + from sklearn.metrics import r2_score + + dataset1=df_test_1.drop('Price',axis=1)#.astype(float) + + y=df_test_1['Price'] + + x=dataset1 + + train = x + target = y + + #切割数据样本集合测试集 + X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) + + #模型缩写 + Lasso = Lasso(random_state=0) + XGBR = XGBRegressor(random_state=0) + #训练模型 + Lasso.fit(X_train,y_train) + XGBR.fit(X_train,y_train) + #模型拟合 + y_pre_Lasso = Lasso.predict(x_test) + y_pre_XGBR = XGBR.predict(x_test) + + #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true,y_pre_Lasso) + XGBR_score=r2_score(y_true,y_pre_XGBR) + + #计算Lasso、XGBR的MSE和RMSE + Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + + Lasso_RMSE=np.sqrt(Lasso_MSE) + XGBR_RMSE=np.sqrt(XGBR_MSE) + # 将不同模型的不同误差值整合成一个表格 + model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], + ['XgBoost', XGBR_RMSE, XGBR_score]], + columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) + #将模型名称(Model)列设置为索引 + model_results1=model_results.set_index('模型(Model)') + + model_results1 + #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整 + def plot_feature_importance(importance,names,model_type): + feature_importance = np.array(importance) + feature_names = np.array(names) + + data={'feature_names':feature_names,'feature_importance':feature_importance} + fi_df = pd.DataFrame(data) + + fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + + plt.figure(figsize=(10,8)) + sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) + + plt.title(model_type + " "+'FEATURE IMPORTANCE') + plt.xlabel('FEATURE IMPORTANCE') + plt.ylabel('FEATURE NAMES') + from pylab import mpl + mpl.rcParams['font.sans-serif'] = ['SimHei'] + ## Xgboost 模型参数优化-初步 + #参考: https://juejin.im/post/6844903661013827598 + #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可) + + from xgboost import XGBRegressor + from sklearn.model_selection import GridSearchCV + + estimator = XGBRegressor(random_state=0, + nthread=4, + seed=0 + ) + parameters = { + 'max_depth': range (2, 11, 2), # 树的最大深度 + 'n_estimators': range (50, 101, 10), # 迭代次数 + 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] + } + + grid_search_XGB = GridSearchCV( + estimator=estimator, + param_grid=parameters, + # n_jobs = 10, + cv = 3, + verbose=True + ) + + grid_search_XGB.fit(X_train, y_train) + #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行 + + best_parameters = grid_search_XGB.best_estimator_.get_params() + y_pred = grid_search_XGB.predict(x_test) + + op_XGBR_score = r2_score(y_true,y_pred) + op_XGBR_MSE= mean_squared_error(y_true, y_pred) + op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + + model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], + columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2=model_results2.set_index('模型(Model)') + + # results = model_results1.append(model_results2, ignore_index = False) + results = pd.concat([model_results1,model_results2],ignore_index=True) + + + + import pickle + + Pkl_Filename = "日度价格预测_最佳模型.pkl" + + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) + + + + +def read_xls_data(): + global one_cols, two_cols + # 打开 XLS 文件 + workbook = xlrd.open_workbook(read_file_path_name) + + # 获取所有表格名称 + # sheet_names = workbook.sheet_names() + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + # num_cols = sheet.ncols + + # 遍历每一行,获取单元格数据 + # for i in range(num_rows): + # row_data = sheet.row_values(i) + # one_cols.append(row_data) + # two_cols.append(row_data[1]) + + row_data = sheet.row_values(1) + print(f'获取到的数据项ID{row_data}') + one_cols = row_data[1:] + print(f'获取到的数据项ID{one_cols}') + + # 关闭 XLS 文件 + # workbook.close() + + + + +def start(date=None,token=None,token_push=None): + read_xls_data() + if date == None: + date = getNow()[0] + if token == None: + token = get_head_auth() + token_push = get_head_push_auth() + + datas = get_data_value(token, one_cols,date) + if not datas: + print("今天没有新数据") + return + + # data_list = [two_cols, one_cols] + append_rows = [getNow(date)[1]] +# append_rows = [getNow()[1]] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + save_xls(append_rows) + + # 获取当月的数据写入到指定文件,如果是补充数据,不需要执行 + queryDataListItemNos() + # 模型训练 + optimize_Model() + # 上传预测数据 + upload_data_to_system(token_push,date) + # data_list.append(three_cols) + # write_xls(data_list) + + +def start_1(date=None): + read_xls_data() + if date == None: + date = getNow(offset=1)[0] + token = get_head_auth() + if not token: + return + + + datas = get_data_value(token, one_cols,date=date) +# if not datas: +# return + + # data_list = [two_cols, one_cols] + append_rows = [getNow(offset=1)[1]] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + save_xls_1(append_rows) + + + # data_list.append(three_cols) + # write_xls(data_list) + +def save_xls_1(append_rows): + + # 打开xls文件 + workbook = xlrd.open_workbook('纯苯数据项.xls') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows - 1 + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("纯苯数据项.xls") + + + + +def check_data(dataItemNo): + token = get_head_auth() + if not token: + return + + datas = get_data_value(token, dataItemNo) + if not datas: + return + + +def save_xls(append_rows): + + # 打开xls文件 + workbook = xlrd.open_workbook('纯苯数据项.xls') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("纯苯数据项.xls") + + + + +def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): + + search_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": dateStart, + "dateEnd": dateEnd, + "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 + } + } + + headers = {"Authorization": token} + search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + return None + + + +def save_queryDataListItemNos_xls(data_df,dataItemNoList): + from datetime import datetime,timedelta + current_year_month = datetime.now().strftime('%Y-%m') + grouped = data_df.groupby("dataDate") + + # 打开xls文件 + workbook = xlrd.open_workbook('纯苯数据项.xls') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + + current_year_month_row = 0 + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + col0 = data[row][0] + # print("col0",col0[:7]) + if col0[:7] == current_year_month: + current_year_month_row += 1 + break + new_sheet.write(row, col, data[row][col]) + + + # print("current_year_month_row",current_year_month_row) + if i == 0: + rowFlag = 0 + # 查看每组数据 + for date, group in grouped: + new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date) + for j in range(len(dataItemNoList)): + dataItemNo = dataItemNoList[j] + + if group[group["dataItemNo"] == dataItemNo]["dataValue"].values and (not str(group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) == 'nan'): + + new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + + rowFlag += 1 + + + # 保存新的xls文件 + new_workbook.save("纯苯数据项.xls") + + + +def queryDataListItemNos(date=None,token=None): + from datetime import datetime, timedelta + df = pd.read_excel('纯苯数据项.xls') + dataItemNoList = df.iloc[0].tolist()[1:] + if token is None: + token = get_head_auth() + if not token: + print('token获取失败') + return + # 获取当前日期 + if date is None: + current_date = datetime.now() + else: + current_date = date + # 获取当月1日 + first_day_of_month = current_date.replace(day=1) + # 格式化为 YYYYMMDD 格式 + dateEnd = current_date.strftime('%Y%m%d') + dateStart = first_day_of_month.strftime('%Y%m%d') + search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + data_df = pd.DataFrame(search_value) + data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) + data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') + save_queryDataListItemNos_xls(data_df,dataItemNoList) + print('当月数据更新完成') + + +if __name__ == "__main__": + print('运行中ing') + start() + diff --git a/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls b/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls index b305bf7..63e38cd 100644 Binary files a/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls and b/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls differ diff --git a/aisenzhecode/聚合级丙烯/丙烯每日价格预测.py b/aisenzhecode/聚合级丙烯/丙烯每日价格预测.py new file mode 100644 index 0000000..04486f4 --- /dev/null +++ b/aisenzhecode/聚合级丙烯/丙烯每日价格预测.py @@ -0,0 +1,632 @@ +import requests +import json +import xlrd +import xlwt +from datetime import datetime +import time +# 变量定义 +login_url = "http://10.200.32.39/jingbo-api/api/server/login" +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" + +login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" +upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" + +login_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +login_push_data = { + "data": { + "account": "api_dev", + "password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=", + "tenantHashCode": "8a4577dbd919675758d57999a1e891fe", + "terminal": "API" + }, + "funcModule": "API", + "funcOperation": "获取token" +} + +read_file_path_name = "丙烯基础数据收集表.xls" +one_cols = [] +two_cols = [] + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sn +import random +import time + + + + +from plotly import __version__ +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot + +from sklearn import preprocessing + +from pandas import Series,DataFrame + +import matplotlib.pyplot as plt + +import sklearn.datasets as datasets + +#导入机器学习算法模型 +from sklearn.linear_model import Lasso +from xgboost import XGBRegressor + +import datetime +import statsmodels.api as sm +from keras.preprocessing.sequence import TimeseriesGenerator + +import plotly.express as px +import plotly.graph_objects as go + +import xgboost as xgb +from xgboost import plot_importance, plot_tree +from sklearn.metrics import mean_absolute_error +from statsmodels.tools.eval_measures import mse,rmse +from sklearn.model_selection import GridSearchCV +from xgboost import XGBRegressor +import warnings +import pickle + +from sklearn.metrics import mean_squared_error + +#切割训练数据和样本数据 +from sklearn.model_selection import train_test_split + +#用于模型评分 +from sklearn.metrics import r2_score + +le = preprocessing.LabelEncoder() + +# print(__version__) # requires version >= 1.9.0 + + +import cufflinks as cf +cf.go_offline() + +random.seed(100) + + + +# 数据获取 + +def get_head_auth(): + login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + +def get_data_value(token, dataItemNoList): + search_data = { + "data": { + "date": get_cur_time()[0], + "dataItemNoList": dataItemNoList + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + return search_value + else: + print("今天没有新数据") + return search_value + + +# xls文件处理 + +def write_xls(data): + # 创建一个Workbook对象 + workbook = xlwt.Workbook() + + # 创建一个Sheet对象,可指定名称 + sheet = workbook.load('Sheet1') + + # 写入数据行 + for row_index, row_data in enumerate(data): + for col_index, cell_data in enumerate(row_data): + sheet.write(row_index, col_index, cell_data) + + # 保存Workbook到文件 + workbook.save(get_cur_time()[0] + '.xls') + + +def get_cur_time(): + now = datetime.datetime.now() + year = now.year + month = now.month + day = now.day + + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) +# cur_time = '20231011' +# cur_time2 = '2023-10-11' + return cur_time, cur_time2 + + +def get_head_push_auth(): + login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + return token + else: + print("获取认证失败") + return None + + + +def upload_data_to_system(token_push): + data = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100007|Forecast_Price|ACN", + "dataDate": get_cur_time()[0], + "dataStatus": "add", +# "dataValue": 7100 + "dataValue": forecast_price() + } + + ] + } + headers = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + print(res.text) + + +# def upload_data_to_system(token): +# data = { +# "funcModule": "数据表信息列表", +# "funcOperation": "新增", +# "data": [ +# {"dataItemNo": "C01100036|Forecast_ Price|ACN", +# "dataDate": '20230706', +# "dataStatus": "add", +# "dataValue": 3780.0 +# } + +# ] +# } +# headers = {"Authorization": token} +# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) +# print(res.text) + + + +def forecast_price(): + # df_test = pd.read_csv('定价模型数据收集0212.csv') + df_test = pd.read_excel('丙烯基础数据收集表.xls') + df_test.drop([0],inplace=True) + df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + + #将缺失值补为前一个或者后一个数值 + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + + # 选择用于模型训练的列名称 + col_for_training = df_test_1.columns + + + + + + import joblib + Best_model_DalyLGPrice = joblib.load("日度价格预测_丙烯最佳模型.pkl") + # 最新的一天为最后一行的数据 + + df_test_1_Day = df_test_1.tail(1) + # 移除不需要的列 + df_test_1_Day.index = df_test_1_Day["Date"] + df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1) + df_test_1_Day=df_test_1_Day.drop('Price',axis=1) + df_test_1_Day=df_test_1_Day.dropna() + + # df_test_1_Day + #预测今日价格,显示至小数点后两位 + Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + + df_test_1_Day['日度预测价格']=Ypredict_Today + print(df_test_1_Day['日度预测价格']) + a = df_test_1_Day['日度预测价格'] + a = a[0] + a = float(a) + a = round(a,2) + return a +def optimize_Model(): + from sklearn.model_selection import train_test_split + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import OrdinalEncoder + from sklearn.feature_selection import SelectFromModel + from sklearn.metrics import mean_squared_error, r2_score + import pandas as pd + + pd.set_option('display.max_rows',40) + pd.set_option('display.max_columns',40) + df_test = pd.read_excel('丙烯基础数据收集表.xls') + df_test.drop([0],inplace=True) + df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) + #查看每个特征缺失值数量 + MisVal_Check=df_test.isnull().sum().sort_values(ascending=False) + #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1 + df_MisVal_Check = pd.DataFrame(MisVal_Check,)# + df_MisVal_Check_1=df_MisVal_Check.reset_index() + df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] + df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test) + df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1) + #将缺失值补为前一个或者后一个数值 + df_test_1 = df_test + df_test_1=df_test_1.fillna(df_test.ffill()) + df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1["Date"] = pd.to_datetime(df_test_1["Date"]) + df_test_1.index = df_test_1["Date"] + df_test_1 = df_test_1.drop(["Date"], axis= 1) + df_test_1 = df_test_1.astype('float') + import numpy as np + import pandas as pd + from pandas import Series,DataFrame + + import matplotlib.pyplot as plt + + import sklearn.datasets as datasets + + #导入机器学习算法模型 + from sklearn.linear_model import Lasso + from xgboost import XGBRegressor + + from datetime import datetime + import statsmodels.api as sm + from keras.preprocessing.sequence import TimeseriesGenerator + + import plotly.express as px + import plotly.graph_objects as go + + import xgboost as xgb + from xgboost import plot_importance, plot_tree + from sklearn.metrics import mean_absolute_error + from statsmodels.tools.eval_measures import mse,rmse + from sklearn.model_selection import GridSearchCV + from xgboost import XGBRegressor + import warnings + import pickle + + from sklearn.metrics import mean_squared_error + + #切割训练数据和样本数据 + from sklearn.model_selection import train_test_split + + #用于模型评分 + from sklearn.metrics import r2_score + + dataset1=df_test_1.drop('Price',axis=1)#.astype(float) + + y=df_test_1['Price'] + + x=dataset1 + + train = x + target = y + + #切割数据样本集合测试集 + X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) + + #模型缩写 + Lasso = Lasso(random_state=0) + XGBR = XGBRegressor(random_state=0) + #训练模型 + Lasso.fit(X_train,y_train) + XGBR.fit(X_train,y_train) + #模型拟合 + y_pre_Lasso = Lasso.predict(x_test) + y_pre_XGBR = XGBR.predict(x_test) + + #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true,y_pre_Lasso) + XGBR_score=r2_score(y_true,y_pre_XGBR) + + #计算Lasso、XGBR的MSE和RMSE + Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + + Lasso_RMSE=np.sqrt(Lasso_MSE) + XGBR_RMSE=np.sqrt(XGBR_MSE) + # 将不同模型的不同误差值整合成一个表格 + model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], + ['XgBoost', XGBR_RMSE, XGBR_score]], + columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) + #将模型名称(Model)列设置为索引 + model_results1=model_results.set_index('模型(Model)') + + model_results1 + #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整 + def plot_feature_importance(importance,names,model_type): + feature_importance = np.array(importance) + feature_names = np.array(names) + + data={'feature_names':feature_names,'feature_importance':feature_importance} + fi_df = pd.DataFrame(data) + + fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + + plt.figure(figsize=(10,8)) + sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) + + plt.title(model_type + " "+'FEATURE IMPORTANCE') + plt.xlabel('FEATURE IMPORTANCE') + plt.ylabel('FEATURE NAMES') + from pylab import mpl + mpl.rcParams['font.sans-serif'] = ['SimHei'] + ## Xgboost 模型参数优化-初步 + #参考: https://juejin.im/post/6844903661013827598 + #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可) + + from xgboost import XGBRegressor + from sklearn.model_selection import GridSearchCV + + estimator = XGBRegressor(random_state=0, + nthread=4, + seed=0 + ) + parameters = { + 'max_depth': range (2, 11, 2), # 树的最大深度 + 'n_estimators': range (50, 101, 10), # 迭代次数 + 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] + } + + grid_search_XGB = GridSearchCV( + estimator=estimator, + param_grid=parameters, + # n_jobs = 10, + cv = 3, + verbose=True + ) + + grid_search_XGB.fit(X_train, y_train) + #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行 + + best_parameters = grid_search_XGB.best_estimator_.get_params() + y_pred = grid_search_XGB.predict(x_test) + + op_XGBR_score = r2_score(y_true,y_pred) + op_XGBR_MSE= mean_squared_error(y_true, y_pred) + op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + + model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], + columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2=model_results2.set_index('模型(Model)') + + results = model_results1.append(model_results2, ignore_index = False) + import pickle + + Pkl_Filename = "日度价格预测_丙烯最佳模型.pkl" + + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) + + + + +def read_xls_data(): + global one_cols, two_cols + # 打开 XLS 文件 + workbook = xlrd.open_workbook(read_file_path_name) + + # 获取所有表格名称 + # sheet_names = workbook.sheet_names() + + # 选择第一个表格 + sheet = workbook.sheet_by_index(0) + + # 获取行数和列数 + num_rows = sheet.nrows + # num_cols = sheet.ncols + + # 遍历每一行,获取单元格数据 + # for i in range(num_rows): + # row_data = sheet.row_values(i) + # one_cols.append(row_data) + # two_cols.append(row_data[1]) + + row_data = sheet.row_values(1) + one_cols = row_data + + # 关闭 XLS 文件 + # workbook.close() + + + + +def start(): + read_xls_data() + + token = get_head_auth() + if not token: + return + token_push = get_head_push_auth() + if not token_push: + return + + datas = get_data_value(token, one_cols[1:]) +# if not datas: +# return + + # data_list = [two_cols, one_cols] + append_rows = [get_cur_time()[1]] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + save_xls(append_rows) + optimize_Model() + upload_data_to_system(token_push) + # data_list.append(three_cols) + # write_xls(data_list) + + +def start_1(): + read_xls_data() + + token = get_head_auth() + if not token: + return + + + datas = get_data_value(token, one_cols[1:]) +# if not datas: +# return + + # data_list = [two_cols, one_cols] + append_rows = [get_cur_time()[1]] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] + + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + save_xls_1(append_rows) + + + # data_list.append(three_cols) + # write_xls(data_list) + +def save_xls_1(append_rows): + + # 打开xls文件 + workbook = xlrd.open_workbook('丙烯基础数据收集表.xls') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows - 1 + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("丙烯基础数据收集表.xls") + + + + +def check_data(dataItemNo): + token = get_head_auth() + if not token: + return + + datas = get_data_value(token, dataItemNo) + if not datas: + return + + +def save_xls(append_rows): + + # 打开xls文件 + workbook = xlrd.open_workbook('丙烯基础数据收集表.xls') + + # 获取所有sheet的个数 + sheet_count = len(workbook.sheet_names()) + + # 获取所有sheet的名称 + sheet_names = workbook.sheet_names() + + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + # 获取当前sheet + sheet = workbook.sheet_by_index(i) + + # 获取sheet的行数和列数 + row_count = sheet.nrows + col_count = sheet.ncols + # 获取原有数据 + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + # 创建xlwt的Workbook对象 + # 创建sheet + new_sheet = new_workbook.add_sheet(sheet_names[i]) + + # 将原有的数据写入新的sheet + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + + if i == 0: + # 在新的sheet中添加数据 + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + + # 保存新的xls文件 + new_workbook.save("丙烯基础数据收集表.xls") + + +if __name__ == "__main__": + start() + diff --git a/aisenzhecode/聚合级丙烯/日度价格预测_丙烯最佳模型.pkl b/aisenzhecode/聚合级丙烯/日度价格预测_丙烯最佳模型.pkl new file mode 100644 index 0000000..481c3de Binary files /dev/null and b/aisenzhecode/聚合级丙烯/日度价格预测_丙烯最佳模型.pkl differ diff --git a/config_jingbo.py b/config_jingbo.py index e0f7bc4..1713032 100644 --- a/config_jingbo.py +++ b/config_jingbo.py @@ -101,9 +101,9 @@ query_data_list_item_nos_url = f"http://{server_host}/jingbo-api/api/warehouse/d # 上传数据项值 push_data_value_list_url = f"http://{server_host}/jingbo-api/api/dw/dataValue/pushDataValueList" # 上传停更数据到市场信息平台 -push_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" +push_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/crudeSaveOrupdate" # 获取预警数据中取消订阅指标ID -get_waring_data_value_list_url = f"http://{server_host}:8080/jingbo-dev/api/basicBuiness/crudeOilWarning/dataList" +get_waring_data_value_list_url = f"http://{server_host}/jingbo-api/api/basicBuiness/crudeOilWarning/dataList" login_data = { diff --git a/config_jingbo_yuedu.py b/config_jingbo_yuedu.py index 07ed40b..713b0c4 100644 --- a/config_jingbo_yuedu.py +++ b/config_jingbo_yuedu.py @@ -391,7 +391,7 @@ table_name = 'v_tbl_crude_oil_warning' is_train = True # 是否训练 is_debug = False # 是否调试 is_eta = True # 是否使用eta接口 -is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 +is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 diff --git a/config_jingbo_zhoudu.py b/config_jingbo_zhoudu.py index 90ca98d..ab8e8c7 100644 --- a/config_jingbo_zhoudu.py +++ b/config_jingbo_zhoudu.py @@ -328,7 +328,7 @@ table_name = 'v_tbl_crude_oil_warning' is_train = True # 是否训练 is_debug = False # 是否调试 is_eta = True # 是否使用eta接口 -is_market = True # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 +is_market = False # 是否通过市场信息平台获取特征 ,在is_eta 为true 的情况下生效 is_timefurture = True # 是否使用时间特征 is_fivemodels = False # 是否使用之前保存的最佳的5个模型 is_edbcode = False # 特征使用edbcoding列表中的 diff --git a/models/nerulforcastmodels.py b/models/nerulforcastmodels.py index 3b30237..b1172a2 100644 --- a/models/nerulforcastmodels.py +++ b/models/nerulforcastmodels.py @@ -1026,7 +1026,10 @@ def model_losss(sqlitedb, end_time): model_results3.to_csv(os.path.join( config.dataset, "model_evaluation.csv"), index=False) modelnames = model_results3['模型(Model)'].tolist() - most_model_name = modelnames[0] + if most_model_name == '': + most_model_name = modelnames[0] + else: + pass allmodelnames = modelnames.copy() # 保存5个最佳模型的名称 if len(modelnames) > 5: @@ -1329,7 +1332,8 @@ def model_losss(sqlitedb, end_time): # for model in most_model: # plt.plot(df['ds'], df[model], label=model,marker='o') - plt.plot(df['ds'], df[most_model_name], label=model, marker='o') + plt.plot(df['ds'], df[most_model_name], + label=most_model_name, marker='o') # 当前日期画竖虚线 plt.axvline(x=df['ds'].iloc[-config.horizon], color='r', linestyle='--') diff --git a/日度价格预测_最佳模型.pkl b/日度价格预测_最佳模型.pkl deleted file mode 100644 index 8102373..0000000 Binary files a/日度价格预测_最佳模型.pkl and /dev/null differ