diff --git a/aisenzhecode/沥青/定性模型数据项12-11.xls b/aisenzhecode/沥青/定性模型数据项12-11.xls new file mode 100644 index 0000000..b7737a5 Binary files /dev/null and b/aisenzhecode/沥青/定性模型数据项12-11.xls differ diff --git a/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb new file mode 100644 index 0000000..24d8db5 --- /dev/null +++ b/aisenzhecode/沥青/沥青定性模型每日推送-ytj.ipynb @@ -0,0 +1,530 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime, timedelta \n", + "import time\n", + "import pandas as pd\n", + "import numpy as np\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"定性模型数据项12-11.xls\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "\n", + "\n", + "\n", + "def start(date=''):\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + "\n", + "\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + "\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " else:\n", + " print(\"获取认证失败\")\n", + " token = None\n", + "\n", + " if date == '':\n", + " now = datetime.now()\n", + " else:\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": cur_time,\n", + " \"dataItemNoList\": one_cols[1:]\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + "# datas = search_value\n", + " if search_value:\n", + " datas = search_value\n", + " else :\n", + " datas = None\n", + " \n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + "# for data_value in datas:\n", + "# dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + "\n", + " workbook = xlrd.open_workbook('定性模型数据项12-11.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"定性模型数据项12-11.xls\")\n", + "\n", + " df = pd.read_excel('定性模型数据项12-11.xls')\n", + " df=df.fillna(df.ffill())\n", + " df1 = df[-2:].reset_index()\n", + " if df1.loc[1,'70号沥青开工率'] > 0.3:\n", + " a = (df1.loc[1,'70号沥青开工率']-0.2)*5/0.1\n", + " else :\n", + " a = 0\n", + " b = df1.loc[1,'资金因素']\n", + " if df1.loc[1,'昨日计划提货偏差']>0:\n", + " c = df1.loc[1,'昨日计划提货偏差']*10/2000\n", + " else :\n", + " c = df1.loc[1,'昨日计划提货偏差']*10/3000\n", + " d = df1.loc[1,'生产情况']\n", + " if df1.loc[1,'基质沥青库存']/265007 >0.8:\n", + " e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000\n", + " else : \n", + " e = 0\n", + " f = df1.loc[1,'下游客户价格预期']\n", + " if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100:\n", + " g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100\n", + " else :\n", + " g = 0\n", + " h = df1.loc[1,'订单结构']\n", + " x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2)\n", + "\n", + "\n", + " login_res1 = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text1 = json.loads(login_res1.text)\n", + " token_push = text1[\"data\"][\"accessToken\"]\n", + "\n", + "\n", + " data1 = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100036|Forecast_Price|DX|ACN\",\n", + " \"dataDate\": cur_time,\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": x\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers1 = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5))\n", + " \n", + " \n", + " \n", + " \n", + "def start_1():\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + "\n", + "\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + "\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " else:\n", + " print(\"获取认证失败\")\n", + " token = None\n", + "\n", + "\n", + " now = datetime.now() - timedelta(days=1) \n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": cur_time,\n", + " \"dataItemNoList\": one_cols[1:]\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + "# datas = search_value\n", + " if search_value:\n", + " datas = search_value\n", + " else :\n", + " datas = None\n", + " \n", + " \n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + "# for data_value in datas:\n", + "# dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + "\n", + " workbook = xlrd.open_workbook('定性模型数据项12-11.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"定性模型数据项12-11.xls\")\n", + "\n", + "\n", + "def start_2(date):\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + "\n", + "\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + "\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " else:\n", + " print(\"获取认证失败\")\n", + " token = None\n", + "\n", + "\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": cur_time,\n", + " \"dataItemNoList\": one_cols[1:]\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + "# datas = search_value\n", + " if search_value:\n", + " datas = search_value\n", + " else :\n", + " datas = None\n", + " \n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + "# for data_value in datas:\n", + "# dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + "\n", + " workbook = xlrd.open_workbook('定性模型数据项12-11.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"定性模型数据项12-11.xls\")\n", + " print('关闭文件')\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " pass\n", + " # 需要单独运行放开\n", + " \n", + " # start_1()\n", + "\n", + " # 每天定时12点运行\n", + " # while True:\n", + " # # 获取当前时间\n", + " # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "\n", + " # # 判断当前时间是否为执行任务的时间点\n", + " # if current_time == \"12:00:00\":\n", + " # print(\"执行定时任务\")\n", + " # start()\n", + "\n", + " # # 休眠1秒钟,避免过多占用CPU资源\n", + " # time.sleep(1)\n", + " \n", + " # elif current_time_1 == \"20:00:00\":\n", + " # print(\"更新数据\")\n", + " # start_1()\n", + " # time.sleep(1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20241031\n", + "20241101\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "\n", + "start_date = datetime(2024, 10, 31)\n", + "end_date = datetime(2024, 11, 2)\n", + "\n", + "while start_date < end_date:\n", + " print(start_date.strftime('%Y%m%d'))\n", + " start(start_date)\n", + " start_date += timedelta(days=1)\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb new file mode 100644 index 0000000..c0e6d0e --- /dev/null +++ b/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb @@ -0,0 +1,913 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime\n", + "import time\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"沥青数据项.xls\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import datetime\n", + "import statsmodels.api as sm\n", + "# from keras.preprocessing.sequence import TimeseriesGenerator\n", + "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date=''):\n", + "\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": get_cur_time(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return None\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def write_xls(data,date):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time(date)[0] + '.xls')\n", + "\n", + "\n", + "def get_cur_time(date = ''):\n", + " if date == '':\n", + " import datetime\n", + " now = datetime.datetime.now()\n", + " else:\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# cur_time = '20231007'\n", + "# cur_time2 = '2023-10-07'\n", + " return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", + " \"dataDate\": get_cur_time(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + " \n", + "# def upload_data_to_system(token):\n", + "# data = {\n", + "# \"funcModule\": \"数据表信息列表\",\n", + "# \"funcOperation\": \"新增\",\n", + "# \"data\": [\n", + "# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n", + "# \"dataDate\": '20230706',\n", + "# \"dataStatus\": \"add\",\n", + "# \"dataValue\": 3780.0\n", + "# }\n", + "\n", + "# ]\n", + "# }\n", + "# headers = {\"Authorization\": token}\n", + "# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + "# print(res.text)\n", + "\n", + "def forecast_price():\n", + " df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", + " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #查看每个特征缺失值数量\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + "\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " # df_test_1_Day\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n", + " df_test.drop([0],inplace=True)\n", + " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", + " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", + " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", + " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", + " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", + " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", + " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", + " # df_test.tail(3)\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", + " df_test_1.index = df_test_1[\"日期\"]\n", + " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", + " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['京博指导价']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + " \n", + " \n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " from datetime import datetime\n", + " import statsmodels.api as sm\n", + " # from keras.preprocessing.sequence import TimeseriesGenerator\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " model_results1\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", + " print(\"Best parameters set:\")\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " for param_name in sorted(parameters.keys()):\n", + " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " # results = model_results1.append(model_results2, ignore_index = False)\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + " results\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "def read_xls_data():\n", + " global one_cols, two_cols\n", + " # 打开 XLS 文件\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + " # 获取所有表格名称\n", + " # sheet_names = workbook.sheet_names()\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + " # num_cols = sheet.ncols\n", + "\n", + " # 遍历每一行,获取单元格数据\n", + " # for i in range(num_rows):\n", + " # row_data = sheet.row_values(i)\n", + " # one_cols.append(row_data)\n", + " # two_cols.append(row_data[1])\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + " # 关闭 XLS 文件\n", + " # workbook.close()\n", + "\n", + "\n", + "\n", + "\n", + "def start():\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "def start_3(date):\n", + " '''预测上传数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "\n", + "def start_1():\n", + " '''更新数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "def start_2(date):\n", + " '''更新数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date=date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " print('新增数据:',append_rows)\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + " \n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xls\") \n", + "\n", + " \n", + " \n", + " \n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('沥青数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"沥青数据项.xls\")\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " pass\n", + " # 需要单独运行放开\n", + "# start()\n", + "\n", + " # 每天定时12点运行\n", + " # while True:\n", + " # # 获取当前时间\n", + " # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "\n", + " # # 判断当前时间是否为执行任务的时间点\n", + " # if current_time == \"12:00:00\":\n", + " # print(\"执行定时任务\")\n", + " # start()\n", + "\n", + " # # 休眠1秒钟,避免过多占用CPU资源\n", + " # time.sleep(1)\n", + " \n", + " # elif current_time_1 == \"20:00:00\":\n", + " # print(\"更新数据\")\n", + " # start_1()\n", + " # time.sleep(1)\n", + "\n", + "\n", + "# # 检测数据准确性, 需要检测放开\n", + "# # check_data(\"100028098|LISTING_PRICE\")\n", + "# # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20241025\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_6896\\1185823687.py:299: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: \n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['random', 'datetime', 'plot', '__version__']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best score: 0.997\n", + "Best parameters set:\n", + "\tlearning_rate: 0.1\n", + "\tmax_depth: 10\n", + "\tn_estimators: 100\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_6896\\1185823687.py:239: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_6896\\1185823687.py:273: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "日期\n", + "2024-10-25 3538.790283\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "新增数据: ['2024-10-25', '', '', 0.2, 0.0, 3660.0, 1.5, 0.1, 0.0, 3450.0, 7.9, 0.2, 0.2, 3450.0, 0.3, '', 3500.0, 75.46, 72.67, '', '', '', '', '', '', '', '', '', 3065.6453, '', '', '', '', '']\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "\n", + "start_date = datetime(2024, 10, 25)\n", + "end_date = datetime(2024, 10, 26)\n", + "\n", + "while start_date < end_date:\n", + " print(start_date.strftime('%Y%m%d'))\n", + " start_3(start_date)\n", + " time.sleep(1)\n", + " start_2(start_date)\n", + " start_date += timedelta(days=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/aisenzhecode/沥青/沥青数据项.xls b/aisenzhecode/沥青/沥青数据项.xls new file mode 100644 index 0000000..6ba4863 Binary files /dev/null and b/aisenzhecode/沥青/沥青数据项.xls differ diff --git a/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb new file mode 100644 index 0000000..a9af99c --- /dev/null +++ b/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb @@ -0,0 +1,1240 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime\n", + "import time\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"液化气数据.xls\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import datetime\n", + "import statsmodels.api as sm\n", + "try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + "except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date):\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": get_cur_time(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return search_value\n", + "\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def write_xls(data):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time()[0] + '.xls')\n", + "\n", + "\n", + "def get_cur_time(date = ''):\n", + " if date == '':\n", + " import datetime\n", + " now = datetime.datetime.now()\n", + " else:\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# cur_time = '20231011'\n", + "# cur_time2 = '2023-10-11'\n", + " return cur_time, cur_time2\n", + "\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"250855713|Forecast_Price|ACN\",\n", + " \"dataDate\": get_cur_time(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + " \n", + "# def upload_data_to_system(token):\n", + "# data = {\n", + "# \"funcModule\": \"数据表信息列表\",\n", + "# \"funcOperation\": \"新增\",\n", + "# \"data\": [\n", + "# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n", + "# \"dataDate\": '20230706',\n", + "# \"dataStatus\": \"add\",\n", + "# \"dataValue\": 3780.0\n", + "# }\n", + "\n", + "# ]\n", + "# }\n", + "# headers = {\"Authorization\": token}\n", + "# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + "# print(res.text)\n", + "\n", + " \n", + " \n", + "def forecast_price():\n", + " # df_test = pd.read_csv('定价模型数据收集0212.csv')\n", + " df_test = pd.read_excel('液化气数据.xls')\n", + " df_test.drop([0],inplace=True)\n", + " try:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " except:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + "\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + "\n", + "\n", + "\n", + "\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_液化气最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " \n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"Date\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"Date\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('Price',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " for col in df_test_1_Day.columns:\n", + " df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce')\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + " import pandas as pd\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('液化气数据.xls')\n", + " df_test.drop([0],inplace=True)\n", + " try:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " except:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " \n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"Date\"] = pd.to_datetime(df_test_1[\"Date\"])\n", + " df_test_1.index = df_test_1[\"Date\"]\n", + " df_test_1 = df_test_1.drop([\"Date\"], axis= 1)\n", + " df_test_1 = df_test_1.astype('float')\n", + " \n", + " \n", + " import numpy as np\n", + " import pandas as pd\n", + " from pandas import Series,DataFrame\n", + "\n", + " import matplotlib.pyplot as plt\n", + "\n", + " import sklearn.datasets as datasets\n", + "\n", + " #导入机器学习算法模型\n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " from datetime import datetime\n", + " import statsmodels.api as sm\n", + " try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + " except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " dataset1=df_test_1.drop('Price',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['Price']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " #训练模型\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " #模型拟合\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " # 将不同模型的不同误差值整合成一个表格\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " #将模型名称(Model)列设置为索引\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " model_results1\n", + " #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " ## Xgboost 模型参数优化-初步\n", + " #参考: https://juejin.im/post/6844903661013827598 \n", + " #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)\n", + "\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行\n", + "\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " try:\n", + " results = model_results1.append(model_results2, ignore_index = False)\n", + " except:\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_液化气最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "\n", + " \n", + "\n", + "def read_xls_data():\n", + " global one_cols, two_cols\n", + " # 打开 XLS 文件\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + " # 获取所有表格名称\n", + " # sheet_names = workbook.sheet_names()\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + " # num_cols = sheet.ncols\n", + "\n", + " # 遍历每一行,获取单元格数据\n", + " # for i in range(num_rows):\n", + " # row_data = sheet.row_values(i)\n", + " # one_cols.append(row_data)\n", + " # two_cols.append(row_data[1])\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + " # 关闭 XLS 文件\n", + " # workbook.close()\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def start(date):\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + "# if not datas:\n", + "# return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "def start_1():\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + "# if not datas:\n", + "# return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time()[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + " \n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('液化气数据.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"液化气数据.xls\") \n", + "\n", + " \n", + " \n", + " \n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('液化气数据.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"液化气数据.xls\")\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " pass\n", + " # 需要单独运行放开\n", + "# start()\n", + "\n", + " # 每天定时12点运行\n", + " # while True:\n", + " # # 获取当前时间\n", + " # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "\n", + " # # 判断当前时间是否为执行任务的时间点\n", + " # if current_time == \"09:15:00\":\n", + " # print(\"执行定时任务\")\n", + " # start()\n", + "\n", + " # # 休眠1秒钟,避免过多占用CPU资源\n", + " # time.sleep(1)\n", + " \n", + " # elif current_time_1 == \"20:00:00\":\n", + " # print(\"更新数据\")\n", + " # start_1()\n", + " # time.sleep(1)\n", + "\n", + "\n", + "# # 检测数据准确性, 需要检测放开\n", + "# # check_data(\"100028098|LISTING_PRICE\")\n", + "# # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20241018\n", + "{'dataDate': '20241018', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241018', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.751e+05, tolerance: 3.460e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['datetime', '__version__', 'random', 'plot']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-18 5041.442871\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241019\n", + "{'dataDate': '20241019', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241019', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.757e+05, tolerance: 3.455e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-19 5035.606934\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241020\n", + "{'dataDate': '20241020', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241020', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.154e+05, tolerance: 3.456e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-20 5035.100586\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241021\n", + "{'dataDate': '20241021', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241021', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.367e+05, tolerance: 3.463e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-21 5039.185059\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241022\n", + "{'dataDate': '20241022', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241022', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.408e+05, tolerance: 3.515e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-22 5064.530762\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241023\n", + "{'dataDate': '20241023', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241023', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 6.883e+05, tolerance: 3.467e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-23 5041.791016\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241024\n", + "{'dataDate': '20241024', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241024', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 6.806e+05, tolerance: 3.479e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-24 5062.01709\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241025\n", + "{'dataDate': '20241025', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE'}\n", + "{'dataDate': '20241025', 'dataItemNo': 'C01100008|PLAN_SALE'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:286: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:288: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 6.352e+05, tolerance: 3.465e+03\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:235: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:237: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_10536\\402792680.py:269: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-25 5034.200684\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "\n", + "start_date = datetime(2024, 10, 18)\n", + "end_date = datetime(2024, 10, 26)\n", + "\n", + "while start_date < end_date:\n", + " print(start_date.strftime('%Y%m%d'))\n", + " start(start_date)\n", + " # time.sleep(1)\n", + " # start_2(start_date)\n", + " start_date += timedelta(days=1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/aisenzhecode/液化石油气/液化气数据.xls b/aisenzhecode/液化石油气/液化气数据.xls new file mode 100644 index 0000000..748c015 Binary files /dev/null and b/aisenzhecode/液化石油气/液化气数据.xls differ diff --git a/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb b/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb new file mode 100644 index 0000000..131ade0 --- /dev/null +++ b/aisenzhecode/石油苯/纯苯价格预测-自定义日期ytj.ipynb @@ -0,0 +1,1210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime\n", + "import time\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"纯苯数据项.xls\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import datetime\n", + "import statsmodels.api as sm\n", + "try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + "except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date):\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": get_cur_time(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return search_value\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def write_xls(data,date):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time(date)[0] + '.xls')\n", + "\n", + "\n", + "def get_cur_time(date=''):\n", + " if date == '':\n", + " import datetime\n", + " now = datetime.datetime.now()\n", + " else:\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + " return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " datavalue = forecast_price()\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100047|FORECAST_PRICE\",\n", + " \"dataDate\": get_cur_time(date)[0],\n", + " \"dataStatus\": \"add\",\n", + " \"dataValue\": datavalue\n", + " }\n", + "\n", + " ]\n", + " }\n", + " print(data)\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + " \n", + "# def upload_data_to_system(token):\n", + "# data = {\n", + "# \"funcModule\": \"数据表信息列表\",\n", + "# \"funcOperation\": \"新增\",\n", + "# \"data\": [\n", + "# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n", + "# \"dataDate\": '20230706',\n", + "# \"dataStatus\": \"add\",\n", + "# \"dataValue\": 3780.0\n", + "# }\n", + "\n", + "# ]\n", + "# }\n", + "# headers = {\"Authorization\": token}\n", + "# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + "# print(res.text)\n", + "\n", + " \n", + " \n", + "def forecast_price():\n", + " # df_test = pd.read_csv('定价模型数据收集0212.csv')\n", + " df_test = pd.read_excel('纯苯数据项.xls',sheet_name='Sheet1')\n", + " df_test.drop([0],inplace=True)\n", + " # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + "\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + "\n", + "\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " \n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"Date\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"Date\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('Price',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " for col in df_test_1_Day.columns:\n", + " df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce')\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + " import pandas as pd\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('纯苯数据项.xls')\n", + " df_test.drop([0],inplace=True)\n", + " # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format='%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " \n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"Date\"] = pd.to_datetime(df_test_1[\"Date\"])\n", + " df_test_1.index = df_test_1[\"Date\"]\n", + " df_test_1 = df_test_1.drop([\"Date\"], axis= 1)\n", + " df_test_1 = df_test_1.astype('float')\n", + " \n", + " \n", + " import numpy as np\n", + " import pandas as pd\n", + " from pandas import Series,DataFrame\n", + "\n", + " import matplotlib.pyplot as plt\n", + "\n", + " import sklearn.datasets as datasets\n", + "\n", + " #导入机器学习算法模型\n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " from datetime import datetime\n", + " import statsmodels.api as sm\n", + " try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + " except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " dataset1=df_test_1.drop('Price',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['Price']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " #训练模型\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " #模型拟合\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " # 将不同模型的不同误差值整合成一个表格\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " #将模型名称(Model)列设置为索引\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " model_results1\n", + " #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " ## Xgboost 模型参数优化-初步\n", + " #参考: https://juejin.im/post/6844903661013827598 \n", + " #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)\n", + "\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行\n", + "\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " # results = model_results1.append(model_results2, ignore_index = False)\n", + " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", + "\n", + "\n", + "\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "\n", + " \n", + "\n", + "def read_xls_data():\n", + " global one_cols, two_cols\n", + " # 打开 XLS 文件\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + " # 获取所有表格名称\n", + " # sheet_names = workbook.sheet_names()\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + " # num_cols = sheet.ncols\n", + "\n", + " # 遍历每一行,获取单元格数据\n", + " # for i in range(num_rows):\n", + " # row_data = sheet.row_values(i)\n", + " # one_cols.append(row_data)\n", + " # two_cols.append(row_data[1])\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + " # 关闭 XLS 文件\n", + " # workbook.close()\n", + "\n", + "\n", + "\n", + "\n", + "def start(date):\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + "# if not datas:\n", + "# return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "def start_1(date):\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:])\n", + "# if not datas:\n", + "# return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + " \n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('纯苯数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"纯苯数据项.xls\") \n", + "\n", + " \n", + " \n", + " \n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('纯苯数据项.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"纯苯数据项.xls\")\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " pass\n", + " # 需要单独运行放开\n", + " # start_1()\n", + "\n", + " # 每天定时12点运行\n", + " # while True:\n", + " # # 获取当前时间\n", + " # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "\n", + " # # 判断当前时间是否为执行任务的时间点\n", + " # if current_time == \"09:15:00\":\n", + " # print(\"执行定时任务\")\n", + " # start()\n", + "\n", + " # # 休眠1秒钟,避免过多占用CPU资源\n", + " # time.sleep(1)\n", + " \n", + " # elif current_time_1 == \"20:00:00\":\n", + " # print(\"更新数据\")\n", + " # start_1()\n", + " # time.sleep(1)\n", + "\n", + "\n", + "# # 检测数据准确性, 需要检测放开\n", + "# # check_data(\"100028098|LISTING_PRICE\")\n", + "# # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20241017\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.602e+05, tolerance: 3.845e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['datetime', '__version__', 'plot', 'random']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-17 7541.753418\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241017', 'dataStatus': 'add', 'dataValue': 7541.75}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241018\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.716e+05, tolerance: 3.895e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-18 7399.281738\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241018', 'dataStatus': 'add', 'dataValue': 7399.28}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241019\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.669e+05, tolerance: 3.913e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-19 7404.584473\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241019', 'dataStatus': 'add', 'dataValue': 7404.58}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241020\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.833e+05, tolerance: 3.773e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-20 7375.245605\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241020', 'dataStatus': 'add', 'dataValue': 7375.25}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241021\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.912e+05, tolerance: 3.684e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-21 7272.15332\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241021', 'dataStatus': 'add', 'dataValue': 7272.15}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241022\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.991e+05, tolerance: 3.700e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-22 7265.592773\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241022', 'dataStatus': 'add', 'dataValue': 7265.59}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241023\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.866e+05, tolerance: 3.682e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-23 7314.694336\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241023', 'dataStatus': 'add', 'dataValue': 7314.69}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241024\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.886e+05, tolerance: 3.690e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-24 7340.938477\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241024', 'dataStatus': 'add', 'dataValue': 7340.94}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241025\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:280: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\linear_model\\_coordinate_descent.py:631: ConvergenceWarning:\n", + "\n", + "Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.843e+05, tolerance: 3.691e+04\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:233: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13372\\3095856616.py:263: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-25 7299.914062\n", + "Name: 日度预测价格, dtype: float32\n", + "{'funcModule': '数据表信息列表', 'funcOperation': '新增', 'data': [{'dataItemNo': 'C01100047|FORECAST_PRICE', 'dataDate': '20241025', 'dataStatus': 'add', 'dataValue': 7299.91}]}\n", + "{\"confirmFlg\":false,\"status\":true}\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "\n", + "start_date = datetime(2024, 10, 17)\n", + "end_date = datetime(2024, 10, 26)\n", + "\n", + "while start_date < end_date:\n", + " print(start_date.strftime('%Y%m%d'))\n", + " start(start_date)\n", + " # time.sleep(1)\n", + " # start_1(start_date)\n", + " start_date += timedelta(days=1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/aisenzhecode/石油苯/纯苯数据项.xls b/aisenzhecode/石油苯/纯苯数据项.xls new file mode 100644 index 0000000..5c50fdf Binary files /dev/null and b/aisenzhecode/石油苯/纯苯数据项.xls differ diff --git a/aisenzhecode/聚合级丙烯/丙烯价格预测yij.ipynb b/aisenzhecode/聚合级丙烯/丙烯价格预测yij.ipynb new file mode 100644 index 0000000..5460fd4 --- /dev/null +++ b/aisenzhecode/聚合级丙烯/丙烯价格预测yij.ipynb @@ -0,0 +1,1367 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import requests\n", + "import json\n", + "import xlrd\n", + "import xlwt\n", + "from datetime import datetime\n", + "import time\n", + "# 变量定义\n", + "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", + "\n", + "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", + "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", + "\n", + "login_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "login_push_data = {\n", + " \"data\": {\n", + " \"account\": \"api_dev\",\n", + " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", + " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", + " \"terminal\": \"API\"\n", + " },\n", + " \"funcModule\": \"API\",\n", + " \"funcOperation\": \"获取token\"\n", + "}\n", + "\n", + "read_file_path_name = \"丙烯基础数据收集表.xls\"\n", + "one_cols = []\n", + "two_cols = []\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sn\n", + "import random\n", + "import time\n", + "\n", + "\n", + "\n", + "\n", + "from plotly import __version__\n", + "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "from pandas import Series,DataFrame\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.datasets as datasets\n", + "\n", + "#导入机器学习算法模型\n", + "from sklearn.linear_model import Lasso\n", + "from xgboost import XGBRegressor\n", + "\n", + "import datetime\n", + "import statsmodels.api as sm\n", + "try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + "except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "\n", + "import xgboost as xgb\n", + "from xgboost import plot_importance, plot_tree\n", + "from sklearn.metrics import mean_absolute_error\n", + "from statsmodels.tools.eval_measures import mse,rmse\n", + "from sklearn.model_selection import GridSearchCV\n", + "from xgboost import XGBRegressor\n", + "import warnings\n", + "import pickle\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "#切割训练数据和样本数据\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#用于模型评分\n", + "from sklearn.metrics import r2_score\n", + "\n", + "le = preprocessing.LabelEncoder()\n", + "\n", + "# print(__version__) # requires version >= 1.9.0\n", + "\n", + "\n", + "import cufflinks as cf\n", + "cf.go_offline()\n", + "\n", + "random.seed(100)\n", + "\n", + "%matplotlib inline\n", + "\n", + "# 数据获取\n", + "\n", + "def get_head_auth():\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "def get_data_value(token, dataItemNoList,date):\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": get_cur_time(date)[0],\n", + " \"dataItemNoList\": dataItemNoList\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + " if search_value:\n", + " return search_value\n", + " else:\n", + " print(\"今天没有新数据\")\n", + " return search_value\n", + "\n", + "\n", + "# xls文件处理\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def write_xls(data):\n", + " # 创建一个Workbook对象\n", + " workbook = xlwt.Workbook()\n", + "\n", + " # 创建一个Sheet对象,可指定名称\n", + " sheet = workbook.load('Sheet1')\n", + "\n", + " # 写入数据行\n", + " for row_index, row_data in enumerate(data):\n", + " for col_index, cell_data in enumerate(row_data):\n", + " sheet.write(row_index, col_index, cell_data)\n", + "\n", + " # 保存Workbook到文件\n", + " workbook.save(get_cur_time()[0] + '.xls')\n", + "\n", + "\n", + "def get_cur_time(date = ''):\n", + " if date == '':\n", + " import datetime\n", + " now = datetime.datetime.now()\n", + " else:\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + "# cur_time = '20231011'\n", + "# cur_time2 = '2023-10-11'\n", + " return cur_time, cur_time2\n", + "\n", + "\n", + "def get_head_push_auth():\n", + " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " return token\n", + " else:\n", + " print(\"获取认证失败\")\n", + " return None\n", + "\n", + "\n", + "\n", + "def upload_data_to_system(token_push,date):\n", + " data = {\n", + " \"funcModule\": \"数据表信息列表\",\n", + " \"funcOperation\": \"新增\",\n", + " \"data\": [\n", + " {\"dataItemNo\": \"C01100007|Forecast_Price|ACN\",\n", + " \"dataDate\": get_cur_time(date)[0],\n", + " \"dataStatus\": \"add\",\n", + "# \"dataValue\": 7100\n", + " \"dataValue\": forecast_price()\n", + " }\n", + "\n", + " ]\n", + " }\n", + " headers = {\"Authorization\": token_push}\n", + " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + " print(res.text)\n", + "\n", + " \n", + "# def upload_data_to_system(token):\n", + "# data = {\n", + "# \"funcModule\": \"数据表信息列表\",\n", + "# \"funcOperation\": \"新增\",\n", + "# \"data\": [\n", + "# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n", + "# \"dataDate\": '20230706',\n", + "# \"dataStatus\": \"add\",\n", + "# \"dataValue\": 3780.0\n", + "# }\n", + "\n", + "# ]\n", + "# }\n", + "# headers = {\"Authorization\": token}\n", + "# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", + "# print(res.text)\n", + "\n", + " \n", + " \n", + "def forecast_price():\n", + " # df_test = pd.read_csv('定价模型数据收集0212.csv')\n", + " df_test = pd.read_excel('丙烯基础数据收集表.xls')\n", + " df_test.drop([0],inplace=True)\n", + " try:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " except:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + "\n", + " # 选择用于模型训练的列名称\n", + " col_for_training = df_test_1.columns\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " import joblib\n", + " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_丙烯最佳模型.pkl\")\n", + " # 最新的一天为最后一行的数据\n", + " \n", + " df_test_1_Day = df_test_1.tail(1)\n", + " # 移除不需要的列\n", + " df_test_1_Day.index = df_test_1_Day[\"Date\"]\n", + " df_test_1_Day = df_test_1_Day.drop([\"Date\"], axis= 1)\n", + " df_test_1_Day=df_test_1_Day.drop('Price',axis=1)\n", + " df_test_1_Day=df_test_1_Day.dropna()\n", + "\n", + " \n", + " for col in df_test_1_Day.columns:\n", + " df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce')\n", + " #预测今日价格,显示至小数点后两位\n", + " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", + "\n", + " df_test_1_Day['日度预测价格']=Ypredict_Today\n", + " print(df_test_1_Day['日度预测价格'])\n", + " a = df_test_1_Day['日度预测价格']\n", + " a = a[0]\n", + " a = float(a)\n", + " a = round(a,2)\n", + " return a\n", + "def optimize_Model():\n", + " from sklearn.model_selection import train_test_split\n", + " from sklearn.impute import SimpleImputer\n", + " from sklearn.preprocessing import OrdinalEncoder\n", + " from sklearn.feature_selection import SelectFromModel\n", + " from sklearn.metrics import mean_squared_error, r2_score\n", + " import pandas as pd\n", + "\n", + " pd.set_option('display.max_rows',40) \n", + " pd.set_option('display.max_columns',40) \n", + " df_test = pd.read_excel('丙烯基础数据收集表.xls')\n", + " df_test.drop([0],inplace=True)\n", + " try:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n", + " except:\n", + " df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n", + "\n", + " #查看每个特征缺失值数量\n", + " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", + " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", + " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", + " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", + " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", + " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", + " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", + " #将缺失值补为前一个或者后一个数值\n", + " df_test_1 = df_test\n", + " df_test_1=df_test_1.fillna(df_test.ffill())\n", + " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", + " df_test_1[\"Date\"] = pd.to_datetime(df_test_1[\"Date\"])\n", + " df_test_1.index = df_test_1[\"Date\"]\n", + " df_test_1 = df_test_1.drop([\"Date\"], axis= 1)\n", + " df_test_1 = df_test_1.astype('float')\n", + " import numpy as np\n", + " import pandas as pd\n", + " from pandas import Series,DataFrame\n", + "\n", + " import matplotlib.pyplot as plt\n", + "\n", + " import sklearn.datasets as datasets\n", + "\n", + " #导入机器学习算法模型\n", + " from sklearn.linear_model import Lasso\n", + " from xgboost import XGBRegressor\n", + "\n", + " from datetime import datetime\n", + " import statsmodels.api as sm\n", + " try:\n", + " from keras.preprocessing.sequence import TimeseriesGenerator\n", + " except:\n", + " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + " import plotly.express as px\n", + " import plotly.graph_objects as go\n", + "\n", + " import xgboost as xgb\n", + " from xgboost import plot_importance, plot_tree\n", + " from sklearn.metrics import mean_absolute_error\n", + " from statsmodels.tools.eval_measures import mse,rmse\n", + " from sklearn.model_selection import GridSearchCV\n", + " from xgboost import XGBRegressor\n", + " import warnings\n", + " import pickle\n", + "\n", + " from sklearn.metrics import mean_squared_error\n", + "\n", + " #切割训练数据和样本数据\n", + " from sklearn.model_selection import train_test_split\n", + "\n", + " #用于模型评分\n", + " from sklearn.metrics import r2_score\n", + "\n", + " dataset1=df_test_1.drop('Price',axis=1)#.astype(float)\n", + "\n", + " y=df_test_1['Price']\n", + "\n", + " x=dataset1 \n", + "\n", + " train = x\n", + " target = y\n", + "\n", + " #切割数据样本集合测试集\n", + " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", + "\n", + " #模型缩写\n", + " Lasso = Lasso(random_state=0)\n", + " XGBR = XGBRegressor(random_state=0)\n", + " #训练模型\n", + " Lasso.fit(X_train,y_train)\n", + " XGBR.fit(X_train,y_train)\n", + " #模型拟合\n", + " y_pre_Lasso = Lasso.predict(x_test)\n", + " y_pre_XGBR = XGBR.predict(x_test)\n", + "\n", + " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", + " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", + " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", + "\n", + " #计算Lasso、XGBR的MSE和RMSE\n", + " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", + " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", + "\n", + " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", + " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", + " # 将不同模型的不同误差值整合成一个表格\n", + " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", + " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", + " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", + " #将模型名称(Model)列设置为索引\n", + " model_results1=model_results.set_index('模型(Model)')\n", + "\n", + " model_results1\n", + " #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整\n", + " def plot_feature_importance(importance,names,model_type):\n", + " feature_importance = np.array(importance)\n", + " feature_names = np.array(names)\n", + "\n", + " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", + " fi_df = pd.DataFrame(data)\n", + "\n", + " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", + "\n", + " plt.figure(figsize=(10,8))\n", + " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", + "\n", + " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", + " plt.xlabel('FEATURE IMPORTANCE')\n", + " plt.ylabel('FEATURE NAMES')\n", + " from pylab import mpl\n", + " %pylab\n", + " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", + " ## Xgboost 模型参数优化-初步\n", + " #参考: https://juejin.im/post/6844903661013827598 \n", + " #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)\n", + "\n", + " from xgboost import XGBRegressor\n", + " from sklearn.model_selection import GridSearchCV\n", + "\n", + " estimator = XGBRegressor(random_state=0,\n", + " nthread=4,\n", + " seed=0\n", + " )\n", + " parameters = {\n", + " 'max_depth': range (2, 11, 2), # 树的最大深度\n", + " 'n_estimators': range (50, 101, 10), # 迭代次数\n", + " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", + " }\n", + "\n", + " grid_search_XGB = GridSearchCV(\n", + " estimator=estimator,\n", + " param_grid=parameters,\n", + " # n_jobs = 10,\n", + " cv = 3,\n", + " verbose=True\n", + " )\n", + "\n", + " grid_search_XGB.fit(X_train, y_train)\n", + " #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行\n", + "\n", + " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", + " y_pred = grid_search_XGB.predict(x_test)\n", + "\n", + " op_XGBR_score = r2_score(y_true,y_pred)\n", + " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", + " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", + "\n", + " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", + " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", + " model_results2=model_results2.set_index('模型(Model)')\n", + "\n", + " try:\n", + " results = model_results1.append(model_results2, ignore_index = False)\n", + " except:\n", + " results = pd.concat([model_results1,model_results2],ignore_index= True)\n", + " import pickle\n", + "\n", + " Pkl_Filename = \"日度价格预测_丙烯最佳模型.pkl\" \n", + "\n", + " with open(Pkl_Filename, 'wb') as file: \n", + " pickle.dump(grid_search_XGB, file)\n", + "\n", + "\n", + " \n", + "\n", + "def read_xls_data():\n", + " global one_cols, two_cols\n", + " # 打开 XLS 文件\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + " # 获取所有表格名称\n", + " # sheet_names = workbook.sheet_names()\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + " # num_cols = sheet.ncols\n", + "\n", + " # 遍历每一行,获取单元格数据\n", + " # for i in range(num_rows):\n", + " # row_data = sheet.row_values(i)\n", + " # one_cols.append(row_data)\n", + " # two_cols.append(row_data[1])\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + " # 关闭 XLS 文件\n", + " # workbook.close()\n", + "\n", + "\n", + "\n", + "\n", + "def start(date):\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " token_push = get_head_push_auth()\n", + " if not token_push:\n", + " return\n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + "# if not datas:\n", + "# return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " save_xls(append_rows)\n", + " optimize_Model()\n", + " upload_data_to_system(token_push,date)\n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + "\n", + "\n", + "def start_1(date=''):\n", + " '''更新数据'''\n", + " read_xls_data()\n", + "\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + " \n", + "\n", + " datas = get_data_value(token, one_cols[1:],date)\n", + " if not datas:\n", + " return\n", + "\n", + " # data_list = [two_cols, one_cols]\n", + " append_rows = [get_cur_time(date=date)[1]]\n", + " dataItemNo_dataValue = {}\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " \n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + " print('新增数据:',append_rows)\n", + " save_xls_1(append_rows)\n", + "\n", + " \n", + " # data_list.append(three_cols)\n", + " # write_xls(data_list)\n", + " \n", + "\n", + "\n", + "def start_2(date):\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + "\n", + "\n", + " # 选择第一个表格\n", + " sheet = workbook.sheet_by_index(0)\n", + "\n", + " # 获取行数和列数\n", + " num_rows = sheet.nrows\n", + "\n", + "\n", + "\n", + " row_data = sheet.row_values(1)\n", + " one_cols = row_data\n", + "\n", + "\n", + " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", + " text = json.loads(login_res.text)\n", + " if text[\"status\"]:\n", + " token = text[\"data\"][\"accessToken\"]\n", + " else:\n", + " print(\"获取认证失败\")\n", + " token = None\n", + "\n", + "\n", + " now = date\n", + " year = now.year\n", + " month = now.month\n", + " day = now.day\n", + "\n", + " if month < 10:\n", + " month = \"0\" + str(month)\n", + " if day < 10:\n", + " day = \"0\" + str(day)\n", + " cur_time = str(year) + str(month) + str(day)\n", + " cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", + " search_data = {\n", + " \"data\": {\n", + " \"date\": cur_time,\n", + " \"dataItemNoList\": one_cols[1:]\n", + " },\n", + " \"funcModule\": \"数据项\",\n", + " \"funcOperation\": \"查询\"\n", + " }\n", + " headers = {\"Authorization\": token}\n", + " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", + " search_value = json.loads(search_res.text)[\"data\"]\n", + "# datas = search_value\n", + " if search_value:\n", + " datas = search_value\n", + " else :\n", + " datas = None\n", + " \n", + "\n", + " append_rows = [cur_time2]\n", + " dataItemNo_dataValue = {}\n", + "# for data_value in datas:\n", + "# dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for data_value in datas:\n", + " if \"dataValue\" not in data_value:\n", + " print(data_value)\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", + " else:\n", + " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", + " for value in one_cols[1:]:\n", + " if value in dataItemNo_dataValue:\n", + " append_rows.append(dataItemNo_dataValue[value])\n", + " else:\n", + " append_rows.append(\"\")\n", + "\n", + " workbook = xlrd.open_workbook(read_file_path_name)\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(read_file_path_name)\n", + " print('关闭文件')\n", + "\n", + "\n", + "def save_xls_1(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('丙烯基础数据收集表.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows - 1\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"丙烯基础数据收集表.xls\") \n", + "\n", + " \n", + " \n", + " \n", + "def check_data(dataItemNo):\n", + " token = get_head_auth()\n", + " if not token:\n", + " return\n", + "\n", + " datas = get_data_value(token, dataItemNo)\n", + " if not datas:\n", + " return\n", + "\n", + "\n", + "def save_xls(append_rows):\n", + "\n", + " # 打开xls文件\n", + " workbook = xlrd.open_workbook('丙烯基础数据收集表.xls')\n", + "\n", + " # 获取所有sheet的个数\n", + " sheet_count = len(workbook.sheet_names())\n", + "\n", + " # 获取所有sheet的名称\n", + " sheet_names = workbook.sheet_names()\n", + "\n", + " new_workbook = xlwt.Workbook()\n", + " for i in range(sheet_count):\n", + " # 获取当前sheet\n", + " sheet = workbook.sheet_by_index(i)\n", + "\n", + " # 获取sheet的行数和列数\n", + " row_count = sheet.nrows\n", + " col_count = sheet.ncols\n", + " # 获取原有数据\n", + " data = []\n", + " for row in range(row_count):\n", + " row_data = []\n", + " for col in range(col_count):\n", + " row_data.append(sheet.cell_value(row, col))\n", + " data.append(row_data)\n", + " # 创建xlwt的Workbook对象\n", + " # 创建sheet\n", + " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", + "\n", + " # 将原有的数据写入新的sheet\n", + " for row in range(row_count):\n", + " for col in range(col_count):\n", + " new_sheet.write(row, col, data[row][col])\n", + "\n", + " if i == 0:\n", + " # 在新的sheet中添加数据\n", + " for col in range(col_count):\n", + " new_sheet.write(row_count, col, append_rows[col])\n", + "\n", + " # 保存新的xls文件\n", + " new_workbook.save(\"丙烯基础数据收集表.xls\")\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " pass\n", + "# # 需要单独运行放开\n", + "# start()\n", + "\n", + "# 每天定时12点运行\n", + " # while True:\n", + " # # 获取当前时间\n", + " # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", + " # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n", + "\n", + " # # 判断当前时间是否为执行任务的时间点\n", + " # if current_time == \"09:20:00\":\n", + " # print(\"执行定时任务\")\n", + " # start()\n", + "\n", + " # # 休眠1秒钟,避免过多占用CPU资源\n", + " # time.sleep(1)\n", + " \n", + " # elif current_time_1 == \"20:00:00\":\n", + " # print(\"更新数据\")\n", + " # start_1()\n", + " # time.sleep(1)\n", + "\n", + "\n", + " # 检测数据准确性, 需要检测放开\n", + " # check_data(\"100028098|LISTING_PRICE\")\n", + " # check_data(\"9137070016544622XB|DAY_Yield\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20241017\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n", + "\n", + "pylab import has clobbered these variables: ['plot', 'datetime', 'random', '__version__']\n", + "`%matplotlib` prevents importing * from pylab and numpy\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-17 6953.115234\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241018\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-18 6949.200684\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241019\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-19 6949.064941\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241020\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-20 6949.524414\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241021\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-21 6951.338867\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241022\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-22 6999.318848\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241023\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-23 7048.455566\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241024\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-24 7098.654297\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n", + "20241025\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:287: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:289: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: QtAgg\n", + "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", + "Populating the interactive namespace from numpy and matplotlib\n", + "Fitting 3 folds for each of 180 candidates, totalling 540 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:234: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:236: UserWarning:\n", + "\n", + "The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n", + "\n", + "C:\\Users\\Hello\\AppData\\Local\\Temp\\ipykernel_13904\\1257534769.py:270: FutureWarning:\n", + "\n", + "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date\n", + "2024-10-25 7098.378906\n", + "Name: 日度预测价格, dtype: float32\n", + "{\"confirmFlg\":false,\"status\":true}\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "\n", + "start_date = datetime(2024, 10, 17)\n", + "end_date = datetime(2024, 10, 26)\n", + "\n", + "while start_date < end_date:\n", + " print(start_date.strftime('%Y%m%d'))\n", + " start(start_date)\n", + " # time.sleep(1)\n", + " # start_2(start_date)\n", + " start_date += timedelta(days=1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls b/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls new file mode 100644 index 0000000..0bdfb5a Binary files /dev/null and b/aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xls differ