964 lines
38 KiB
Plaintext
964 lines
38 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
" <script type=\"text/javascript\">\n",
|
||
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
|
||
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
|
||
" if (typeof require !== 'undefined') {\n",
|
||
" require.undef(\"plotly\");\n",
|
||
" requirejs.config({\n",
|
||
" paths: {\n",
|
||
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
|
||
" }\n",
|
||
" });\n",
|
||
" require(['plotly'], function(Plotly) {\n",
|
||
" window._Plotly = Plotly;\n",
|
||
" });\n",
|
||
" }\n",
|
||
" </script>\n",
|
||
" "
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"import requests\n",
|
||
"import json\n",
|
||
"import xlrd\n",
|
||
"import xlwt\n",
|
||
"from datetime import datetime\n",
|
||
"import time\n",
|
||
"# 变量定义\n",
|
||
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
|
||
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
|
||
"\n",
|
||
"login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
|
||
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
|
||
"\n",
|
||
"login_data = {\n",
|
||
" \"data\": {\n",
|
||
" \"account\": \"api_dev\",\n",
|
||
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
|
||
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
|
||
" \"terminal\": \"API\"\n",
|
||
" },\n",
|
||
" \"funcModule\": \"API\",\n",
|
||
" \"funcOperation\": \"获取token\"\n",
|
||
"}\n",
|
||
"\n",
|
||
"login_push_data = {\n",
|
||
" \"data\": {\n",
|
||
" \"account\": \"api_dev\",\n",
|
||
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
|
||
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
|
||
" \"terminal\": \"API\"\n",
|
||
" },\n",
|
||
" \"funcModule\": \"API\",\n",
|
||
" \"funcOperation\": \"获取token\"\n",
|
||
"}\n",
|
||
"\n",
|
||
"read_file_path_name = \"沥青数据项.xls\"\n",
|
||
"one_cols = []\n",
|
||
"two_cols = []\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sn\n",
|
||
"import random\n",
|
||
"import time\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"from plotly import __version__\n",
|
||
"from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
|
||
"\n",
|
||
"from sklearn import preprocessing\n",
|
||
"\n",
|
||
"from pandas import Series,DataFrame\n",
|
||
"\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"\n",
|
||
"import sklearn.datasets as datasets\n",
|
||
"\n",
|
||
"#导入机器学习算法模型\n",
|
||
"from sklearn.linear_model import Lasso\n",
|
||
"from xgboost import XGBRegressor\n",
|
||
"\n",
|
||
"import datetime\n",
|
||
"import statsmodels.api as sm\n",
|
||
"# from keras.preprocessing.sequence import TimeseriesGenerator\n",
|
||
"from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
|
||
"\n",
|
||
"import plotly.express as px\n",
|
||
"import plotly.graph_objects as go\n",
|
||
"\n",
|
||
"import xgboost as xgb\n",
|
||
"from xgboost import plot_importance, plot_tree\n",
|
||
"from sklearn.metrics import mean_absolute_error\n",
|
||
"from statsmodels.tools.eval_measures import mse,rmse\n",
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from xgboost import XGBRegressor\n",
|
||
"import warnings\n",
|
||
"import pickle\n",
|
||
"\n",
|
||
"from sklearn.metrics import mean_squared_error\n",
|
||
"\n",
|
||
"#切割训练数据和样本数据\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"#用于模型评分\n",
|
||
"from sklearn.metrics import r2_score\n",
|
||
"\n",
|
||
"le = preprocessing.LabelEncoder()\n",
|
||
"\n",
|
||
"# print(__version__) # requires version >= 1.9.0\n",
|
||
"\n",
|
||
"\n",
|
||
"import cufflinks as cf\n",
|
||
"cf.go_offline()\n",
|
||
"\n",
|
||
"random.seed(100)\n",
|
||
"\n",
|
||
"%matplotlib inline\n",
|
||
"\n",
|
||
"# 数据获取\n",
|
||
"\n",
|
||
"def get_head_auth():\n",
|
||
" login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
|
||
" text = json.loads(login_res.text)\n",
|
||
" if text[\"status\"]:\n",
|
||
" token = text[\"data\"][\"accessToken\"]\n",
|
||
" return token\n",
|
||
" else:\n",
|
||
" print(\"获取认证失败\")\n",
|
||
" return None\n",
|
||
"\n",
|
||
"\n",
|
||
"def get_data_value(token, dataItemNoList,date=''):\n",
|
||
"\n",
|
||
" search_data = {\n",
|
||
" \"data\": {\n",
|
||
" \"date\": get_cur_time(date)[0],\n",
|
||
" \"dataItemNoList\": dataItemNoList\n",
|
||
" },\n",
|
||
" \"funcModule\": \"数据项\",\n",
|
||
" \"funcOperation\": \"查询\"\n",
|
||
" }\n",
|
||
" headers = {\"Authorization\": token}\n",
|
||
" search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
|
||
" search_value = json.loads(search_res.text)[\"data\"]\n",
|
||
" if search_value:\n",
|
||
" return search_value\n",
|
||
" else:\n",
|
||
" print(\"今天没有新数据\")\n",
|
||
" return None\n",
|
||
"\n",
|
||
"\n",
|
||
"# xls文件处理\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"def write_xls(data,date):\n",
|
||
" # 创建一个Workbook对象\n",
|
||
" workbook = xlwt.Workbook()\n",
|
||
"\n",
|
||
" # 创建一个Sheet对象,可指定名称\n",
|
||
" sheet = workbook.load('Sheet1')\n",
|
||
"\n",
|
||
" # 写入数据行\n",
|
||
" for row_index, row_data in enumerate(data):\n",
|
||
" for col_index, cell_data in enumerate(row_data):\n",
|
||
" sheet.write(row_index, col_index, cell_data)\n",
|
||
"\n",
|
||
" # 保存Workbook到文件\n",
|
||
" workbook.save(get_cur_time(date)[0] + '.xls')\n",
|
||
"\n",
|
||
"\n",
|
||
"def get_cur_time(date = ''):\n",
|
||
" if date == '':\n",
|
||
" import datetime\n",
|
||
" now = datetime.datetime.now()\n",
|
||
" else:\n",
|
||
" now = date\n",
|
||
" year = now.year\n",
|
||
" month = now.month\n",
|
||
" day = now.day\n",
|
||
"\n",
|
||
" if month < 10:\n",
|
||
" month = \"0\" + str(month)\n",
|
||
" if day < 10:\n",
|
||
" day = \"0\" + str(day)\n",
|
||
" cur_time = str(year) + str(month) + str(day)\n",
|
||
" cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
|
||
"# cur_time = '20231007'\n",
|
||
"# cur_time2 = '2023-10-07'\n",
|
||
" return cur_time, cur_time2\n",
|
||
"\n",
|
||
"\n",
|
||
"def get_head_push_auth():\n",
|
||
" login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
|
||
" text = json.loads(login_res.text)\n",
|
||
" if text[\"status\"]:\n",
|
||
" token = text[\"data\"][\"accessToken\"]\n",
|
||
" return token\n",
|
||
" else:\n",
|
||
" print(\"获取认证失败\")\n",
|
||
" return None\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"def upload_data_to_system(token_push,date):\n",
|
||
" data = {\n",
|
||
" \"funcModule\": \"数据表信息列表\",\n",
|
||
" \"funcOperation\": \"新增\",\n",
|
||
" \"data\": [\n",
|
||
" {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
|
||
" \"dataDate\": get_cur_time(date)[0],\n",
|
||
" \"dataStatus\": \"add\",\n",
|
||
" \"dataValue\": forecast_price()\n",
|
||
" }\n",
|
||
"\n",
|
||
" ]\n",
|
||
" }\n",
|
||
" headers = {\"Authorization\": token_push}\n",
|
||
" res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
|
||
" print(res.text)\n",
|
||
"\n",
|
||
" \n",
|
||
"# def upload_data_to_system(token):\n",
|
||
"# data = {\n",
|
||
"# \"funcModule\": \"数据表信息列表\",\n",
|
||
"# \"funcOperation\": \"新增\",\n",
|
||
"# \"data\": [\n",
|
||
"# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n",
|
||
"# \"dataDate\": '20230706',\n",
|
||
"# \"dataStatus\": \"add\",\n",
|
||
"# \"dataValue\": 3780.0\n",
|
||
"# }\n",
|
||
"\n",
|
||
"# ]\n",
|
||
"# }\n",
|
||
"# headers = {\"Authorization\": token}\n",
|
||
"# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
|
||
"# print(res.text)\n",
|
||
"\n",
|
||
"def forecast_price():\n",
|
||
" df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n",
|
||
" df_test.drop([0],inplace=True)\n",
|
||
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
|
||
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
|
||
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
|
||
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
|
||
" '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n",
|
||
" '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
|
||
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
|
||
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
|
||
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
|
||
" '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
|
||
" # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n",
|
||
" df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
|
||
"\n",
|
||
" #查看每个特征缺失值数量\n",
|
||
" MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
|
||
" #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
|
||
" df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
|
||
" df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
|
||
" df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
|
||
" df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
|
||
" df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
|
||
"\n",
|
||
" #将缺失值补为前一个或者后一个数值\n",
|
||
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
|
||
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
|
||
"\n",
|
||
" # 选择用于模型训练的列名称\n",
|
||
" col_for_training = df_test_1.columns\n",
|
||
" import joblib\n",
|
||
" Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n",
|
||
" # 最新的一天为最后一行的数据\n",
|
||
" df_test_1_Day = df_test_1.tail(1)\n",
|
||
" # 移除不需要的列\n",
|
||
" df_test_1_Day.index = df_test_1_Day[\"日期\"]\n",
|
||
" df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n",
|
||
" df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n",
|
||
" df_test_1_Day=df_test_1_Day.dropna()\n",
|
||
"\n",
|
||
" # df_test_1_Day\n",
|
||
" #预测今日价格,显示至小数点后两位\n",
|
||
" Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
|
||
"\n",
|
||
" df_test_1_Day['日度预测价格']=Ypredict_Today\n",
|
||
" print(df_test_1_Day['日度预测价格'])\n",
|
||
" a = df_test_1_Day['日度预测价格']\n",
|
||
" a = a[0]\n",
|
||
" a = float(a)\n",
|
||
" a = round(a,2)\n",
|
||
" return a\n",
|
||
"def optimize_Model():\n",
|
||
" from sklearn.model_selection import train_test_split\n",
|
||
" from sklearn.impute import SimpleImputer\n",
|
||
" from sklearn.preprocessing import OrdinalEncoder\n",
|
||
" from sklearn.feature_selection import SelectFromModel\n",
|
||
" from sklearn.metrics import mean_squared_error, r2_score\n",
|
||
"\n",
|
||
" pd.set_option('display.max_rows',40) \n",
|
||
" pd.set_option('display.max_columns',40) \n",
|
||
" df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n",
|
||
" df_test.drop([0],inplace=True)\n",
|
||
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
|
||
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
|
||
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
|
||
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
|
||
" '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
|
||
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
|
||
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
|
||
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
|
||
" '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
|
||
" # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n",
|
||
" # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n",
|
||
" df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
|
||
" # df_test.tail(3)\n",
|
||
" MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
|
||
" #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n",
|
||
" df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
|
||
" df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
|
||
" df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
|
||
" df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
|
||
" df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
|
||
" #将缺失值补为前一个或者后一个数值\n",
|
||
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
|
||
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
|
||
" df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n",
|
||
" df_test_1.index = df_test_1[\"日期\"]\n",
|
||
" df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n",
|
||
" dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n",
|
||
"\n",
|
||
" y=df_test_1['京博指导价']\n",
|
||
"\n",
|
||
" x=dataset1 \n",
|
||
"\n",
|
||
" train = x\n",
|
||
" target = y\n",
|
||
"\n",
|
||
" #切割数据样本集合测试集\n",
|
||
" X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
|
||
" \n",
|
||
" \n",
|
||
" from sklearn.linear_model import Lasso\n",
|
||
" from xgboost import XGBRegressor\n",
|
||
"\n",
|
||
" from datetime import datetime\n",
|
||
" import statsmodels.api as sm\n",
|
||
" # from keras.preprocessing.sequence import TimeseriesGenerator\n",
|
||
" from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
|
||
"\n",
|
||
" import plotly.express as px\n",
|
||
" import plotly.graph_objects as go\n",
|
||
"\n",
|
||
" import xgboost as xgb\n",
|
||
" from xgboost import plot_importance, plot_tree\n",
|
||
" from sklearn.metrics import mean_absolute_error\n",
|
||
" from statsmodels.tools.eval_measures import mse,rmse\n",
|
||
" from sklearn.model_selection import GridSearchCV\n",
|
||
" from xgboost import XGBRegressor\n",
|
||
" import warnings\n",
|
||
" import pickle\n",
|
||
"\n",
|
||
" from sklearn.metrics import mean_squared_error\n",
|
||
"\n",
|
||
" #切割训练数据和样本数据\n",
|
||
" from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
" #用于模型评分\n",
|
||
" from sklearn.metrics import r2_score\n",
|
||
"\n",
|
||
" #模型缩写\n",
|
||
" Lasso = Lasso(random_state=0)\n",
|
||
" XGBR = XGBRegressor(random_state=0)\n",
|
||
" Lasso.fit(X_train,y_train)\n",
|
||
" XGBR.fit(X_train,y_train)\n",
|
||
" y_pre_Lasso = Lasso.predict(x_test)\n",
|
||
" y_pre_XGBR = XGBR.predict(x_test)\n",
|
||
"\n",
|
||
" #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
|
||
" Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
|
||
" XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
|
||
"\n",
|
||
" #计算Lasso、XGBR的MSE和RMSE\n",
|
||
" Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
|
||
" XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
|
||
"\n",
|
||
" Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
|
||
" XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
|
||
" model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
|
||
" ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
|
||
" columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
|
||
" model_results1=model_results.set_index('模型(Model)')\n",
|
||
"\n",
|
||
" def plot_feature_importance(importance,names,model_type):\n",
|
||
" feature_importance = np.array(importance)\n",
|
||
" feature_names = np.array(names)\n",
|
||
"\n",
|
||
" data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
|
||
" fi_df = pd.DataFrame(data)\n",
|
||
"\n",
|
||
" fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
|
||
"\n",
|
||
" plt.figure(figsize=(10,8))\n",
|
||
" sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
|
||
"\n",
|
||
" plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
|
||
" plt.xlabel('FEATURE IMPORTANCE')\n",
|
||
" plt.ylabel('FEATURE NAMES')\n",
|
||
" from pylab import mpl\n",
|
||
" %pylab\n",
|
||
" mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
|
||
" from xgboost import XGBRegressor\n",
|
||
" from sklearn.model_selection import GridSearchCV\n",
|
||
"\n",
|
||
" estimator = XGBRegressor(random_state=0,\n",
|
||
" nthread=4,\n",
|
||
" seed=0\n",
|
||
" )\n",
|
||
" parameters = {\n",
|
||
" 'max_depth': range (2, 11, 2), # 树的最大深度\n",
|
||
" 'n_estimators': range (50, 101, 10), # 迭代次数\n",
|
||
" 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
|
||
" }\n",
|
||
"\n",
|
||
" grid_search_XGB = GridSearchCV(\n",
|
||
" estimator=estimator,\n",
|
||
" param_grid=parameters,\n",
|
||
" # n_jobs = 10,\n",
|
||
" cv = 3,\n",
|
||
" verbose=True\n",
|
||
" )\n",
|
||
"\n",
|
||
" grid_search_XGB.fit(X_train, y_train)\n",
|
||
" print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n",
|
||
" print(\"Best parameters set:\")\n",
|
||
" best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
|
||
" for param_name in sorted(parameters.keys()):\n",
|
||
" print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n",
|
||
" y_pred = grid_search_XGB.predict(x_test)\n",
|
||
"\n",
|
||
" op_XGBR_score = r2_score(y_true,y_pred)\n",
|
||
" op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
|
||
" op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
|
||
"\n",
|
||
" model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
|
||
" columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
|
||
" model_results2=model_results2.set_index('模型(Model)')\n",
|
||
"\n",
|
||
" # results = model_results1.append(model_results2, ignore_index = False)\n",
|
||
" results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
|
||
" results\n",
|
||
" import pickle\n",
|
||
"\n",
|
||
" Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n",
|
||
"\n",
|
||
" with open(Pkl_Filename, 'wb') as file: \n",
|
||
" pickle.dump(grid_search_XGB, file)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
" \n",
|
||
"\n",
|
||
"\n",
|
||
" \n",
|
||
"\n",
|
||
"def read_xls_data():\n",
|
||
" global one_cols, two_cols\n",
|
||
" # 打开 XLS 文件\n",
|
||
" workbook = xlrd.open_workbook(read_file_path_name)\n",
|
||
"\n",
|
||
" # 获取所有表格名称\n",
|
||
" # sheet_names = workbook.sheet_names()\n",
|
||
"\n",
|
||
" # 选择第一个表格\n",
|
||
" sheet = workbook.sheet_by_index(0)\n",
|
||
"\n",
|
||
" # 获取行数和列数\n",
|
||
" num_rows = sheet.nrows\n",
|
||
" # num_cols = sheet.ncols\n",
|
||
"\n",
|
||
" # 遍历每一行,获取单元格数据\n",
|
||
" # for i in range(num_rows):\n",
|
||
" # row_data = sheet.row_values(i)\n",
|
||
" # one_cols.append(row_data)\n",
|
||
" # two_cols.append(row_data[1])\n",
|
||
"\n",
|
||
" row_data = sheet.row_values(1)\n",
|
||
" one_cols = row_data\n",
|
||
"\n",
|
||
" # 关闭 XLS 文件\n",
|
||
" # workbook.close()\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"def start():\n",
|
||
" '''预测上传数据'''\n",
|
||
" read_xls_data()\n",
|
||
"\n",
|
||
" token = get_head_auth()\n",
|
||
" if not token:\n",
|
||
" return\n",
|
||
" token_push = get_head_push_auth()\n",
|
||
" if not token_push:\n",
|
||
" return\n",
|
||
"\n",
|
||
" datas = get_data_value(token, one_cols[1:])\n",
|
||
" if not datas:\n",
|
||
" return\n",
|
||
"\n",
|
||
" # data_list = [two_cols, one_cols]\n",
|
||
" append_rows = [get_cur_time()[1]]\n",
|
||
" dataItemNo_dataValue = {}\n",
|
||
" for data_value in datas:\n",
|
||
" if \"dataValue\" not in data_value:\n",
|
||
" print(data_value)\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
|
||
" else:\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
|
||
" \n",
|
||
" for value in one_cols[1:]:\n",
|
||
" if value in dataItemNo_dataValue:\n",
|
||
" append_rows.append(dataItemNo_dataValue[value])\n",
|
||
" else:\n",
|
||
" append_rows.append(\"\")\n",
|
||
" save_xls(append_rows)\n",
|
||
" optimize_Model()\n",
|
||
" upload_data_to_system(token_push)\n",
|
||
" # data_list.append(three_cols)\n",
|
||
" # write_xls(data_list)\n",
|
||
"\n",
|
||
"\n",
|
||
"def start_3(date):\n",
|
||
" '''预测上传数据'''\n",
|
||
" read_xls_data()\n",
|
||
"\n",
|
||
" token = get_head_auth()\n",
|
||
" if not token:\n",
|
||
" return\n",
|
||
" token_push = get_head_push_auth()\n",
|
||
" if not token_push:\n",
|
||
" return\n",
|
||
"\n",
|
||
" datas = get_data_value(token, one_cols[1:],date)\n",
|
||
" if not datas:\n",
|
||
" return\n",
|
||
"\n",
|
||
" # data_list = [two_cols, one_cols]\n",
|
||
" append_rows = [get_cur_time(date)[1]]\n",
|
||
" dataItemNo_dataValue = {}\n",
|
||
" for data_value in datas:\n",
|
||
" if \"dataValue\" not in data_value:\n",
|
||
" print(data_value)\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
|
||
" else:\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
|
||
" \n",
|
||
" for value in one_cols[1:]:\n",
|
||
" if value in dataItemNo_dataValue:\n",
|
||
" append_rows.append(dataItemNo_dataValue[value])\n",
|
||
" else:\n",
|
||
" append_rows.append(\"\")\n",
|
||
" save_xls(append_rows)\n",
|
||
" optimize_Model()\n",
|
||
" upload_data_to_system(token_push,date)\n",
|
||
" # data_list.append(three_cols)\n",
|
||
" # write_xls(data_list)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"def start_1():\n",
|
||
" '''更新数据'''\n",
|
||
" read_xls_data()\n",
|
||
"\n",
|
||
" token = get_head_auth()\n",
|
||
" if not token:\n",
|
||
" return\n",
|
||
" \n",
|
||
"\n",
|
||
" datas = get_data_value(token, one_cols[1:])\n",
|
||
" if not datas:\n",
|
||
" return\n",
|
||
"\n",
|
||
" # data_list = [two_cols, one_cols]\n",
|
||
" append_rows = [get_cur_time()[1]]\n",
|
||
" dataItemNo_dataValue = {}\n",
|
||
" for data_value in datas:\n",
|
||
" if \"dataValue\" not in data_value:\n",
|
||
" print(data_value)\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
|
||
" else:\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
|
||
" \n",
|
||
" for value in one_cols[1:]:\n",
|
||
" if value in dataItemNo_dataValue:\n",
|
||
" append_rows.append(dataItemNo_dataValue[value])\n",
|
||
" else:\n",
|
||
" append_rows.append(\"\")\n",
|
||
" save_xls_1(append_rows)\n",
|
||
"\n",
|
||
" \n",
|
||
" # data_list.append(three_cols)\n",
|
||
" # write_xls(data_list)\n",
|
||
"\n",
|
||
"\n",
|
||
"def start_2(date):\n",
|
||
" '''更新数据'''\n",
|
||
" read_xls_data()\n",
|
||
"\n",
|
||
" token = get_head_auth()\n",
|
||
" if not token:\n",
|
||
" return\n",
|
||
" \n",
|
||
"\n",
|
||
" datas = get_data_value(token, one_cols[1:],date)\n",
|
||
" if not datas:\n",
|
||
" return\n",
|
||
"\n",
|
||
" # data_list = [two_cols, one_cols]\n",
|
||
" append_rows = [get_cur_time(date=date)[1]]\n",
|
||
" dataItemNo_dataValue = {}\n",
|
||
" for data_value in datas:\n",
|
||
" if \"dataValue\" not in data_value:\n",
|
||
" print(data_value)\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
|
||
" else:\n",
|
||
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
|
||
" \n",
|
||
" for value in one_cols[1:]:\n",
|
||
" if value in dataItemNo_dataValue:\n",
|
||
" append_rows.append(dataItemNo_dataValue[value])\n",
|
||
" else:\n",
|
||
" append_rows.append(\"\")\n",
|
||
" print('新增数据:',append_rows)\n",
|
||
" save_xls_1(append_rows)\n",
|
||
"\n",
|
||
" \n",
|
||
" # data_list.append(three_cols)\n",
|
||
" # write_xls(data_list)\n",
|
||
" \n",
|
||
"def save_xls_1(append_rows):\n",
|
||
"\n",
|
||
" # 打开xls文件\n",
|
||
" workbook = xlrd.open_workbook('沥青数据项.xls')\n",
|
||
"\n",
|
||
" # 获取所有sheet的个数\n",
|
||
" sheet_count = len(workbook.sheet_names())\n",
|
||
"\n",
|
||
" # 获取所有sheet的名称\n",
|
||
" sheet_names = workbook.sheet_names()\n",
|
||
"\n",
|
||
" new_workbook = xlwt.Workbook()\n",
|
||
" for i in range(sheet_count):\n",
|
||
" # 获取当前sheet\n",
|
||
" sheet = workbook.sheet_by_index(i)\n",
|
||
"\n",
|
||
" # 获取sheet的行数和列数\n",
|
||
" row_count = sheet.nrows - 1\n",
|
||
" col_count = sheet.ncols\n",
|
||
" # 获取原有数据\n",
|
||
" data = []\n",
|
||
" for row in range(row_count):\n",
|
||
" row_data = []\n",
|
||
" for col in range(col_count):\n",
|
||
" row_data.append(sheet.cell_value(row, col))\n",
|
||
" data.append(row_data)\n",
|
||
" # 创建xlwt的Workbook对象\n",
|
||
" # 创建sheet\n",
|
||
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
|
||
"\n",
|
||
" # 将原有的数据写入新的sheet\n",
|
||
" for row in range(row_count):\n",
|
||
" for col in range(col_count):\n",
|
||
" new_sheet.write(row, col, data[row][col])\n",
|
||
"\n",
|
||
" if i == 0:\n",
|
||
" # 在新的sheet中添加数据\n",
|
||
" for col in range(col_count):\n",
|
||
" new_sheet.write(row_count, col, append_rows[col])\n",
|
||
"\n",
|
||
" # 保存新的xls文件\n",
|
||
" new_workbook.save(\"沥青数据项.xls\") \n",
|
||
"\n",
|
||
" \n",
|
||
" \n",
|
||
" \n",
|
||
"def check_data(dataItemNo):\n",
|
||
" token = get_head_auth()\n",
|
||
" if not token:\n",
|
||
" return\n",
|
||
"\n",
|
||
" datas = get_data_value(token, dataItemNo)\n",
|
||
" if not datas:\n",
|
||
" return\n",
|
||
"\n",
|
||
"\n",
|
||
"def save_xls(append_rows):\n",
|
||
"\n",
|
||
" # 打开xls文件\n",
|
||
" workbook = xlrd.open_workbook('沥青数据项.xls')\n",
|
||
"\n",
|
||
" # 获取所有sheet的个数\n",
|
||
" sheet_count = len(workbook.sheet_names())\n",
|
||
"\n",
|
||
" # 获取所有sheet的名称\n",
|
||
" sheet_names = workbook.sheet_names()\n",
|
||
"\n",
|
||
" new_workbook = xlwt.Workbook()\n",
|
||
" for i in range(sheet_count):\n",
|
||
" # 获取当前sheet\n",
|
||
" sheet = workbook.sheet_by_index(i)\n",
|
||
"\n",
|
||
" # 获取sheet的行数和列数\n",
|
||
" row_count = sheet.nrows\n",
|
||
" col_count = sheet.ncols\n",
|
||
" # 获取原有数据\n",
|
||
" data = []\n",
|
||
" for row in range(row_count):\n",
|
||
" row_data = []\n",
|
||
" for col in range(col_count):\n",
|
||
" row_data.append(sheet.cell_value(row, col))\n",
|
||
" data.append(row_data)\n",
|
||
" # 创建xlwt的Workbook对象\n",
|
||
" # 创建sheet\n",
|
||
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
|
||
"\n",
|
||
" # 将原有的数据写入新的sheet\n",
|
||
" for row in range(row_count):\n",
|
||
" for col in range(col_count):\n",
|
||
" new_sheet.write(row, col, data[row][col])\n",
|
||
"\n",
|
||
" if i == 0:\n",
|
||
" # 在新的sheet中添加数据\n",
|
||
" for col in range(col_count):\n",
|
||
" new_sheet.write(row_count, col, append_rows[col])\n",
|
||
"\n",
|
||
" # 保存新的xls文件\n",
|
||
" new_workbook.save(\"沥青数据项.xls\")\n",
|
||
"\n",
|
||
"\n",
|
||
"if __name__ == \"__main__\":\n",
|
||
" pass\n",
|
||
" # 需要单独运行放开\n",
|
||
"# start()\n",
|
||
"\n",
|
||
" # 每天定时12点运行\n",
|
||
" # while True:\n",
|
||
" # # 获取当前时间\n",
|
||
" # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||
" # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
|
||
"\n",
|
||
" # # 判断当前时间是否为执行任务的时间点\n",
|
||
" # if current_time == \"12:00:00\":\n",
|
||
" # print(\"执行定时任务\")\n",
|
||
" # start()\n",
|
||
"\n",
|
||
" # # 休眠1秒钟,避免过多占用CPU资源\n",
|
||
" # time.sleep(1)\n",
|
||
" \n",
|
||
" # elif current_time_1 == \"20:00:00\":\n",
|
||
" # print(\"更新数据\")\n",
|
||
" # start_1()\n",
|
||
" # time.sleep(1)\n",
|
||
"\n",
|
||
"\n",
|
||
"# # 检测数据准确性, 需要检测放开\n",
|
||
"# # check_data(\"100028098|LISTING_PRICE\")\n",
|
||
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"20241223\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
|
||
"\n",
|
||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Using matplotlib backend: <object object at 0x0000027F8B613090>\n",
|
||
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
|
||
"Populating the interactive namespace from numpy and matplotlib\n",
|
||
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
|
||
"\n",
|
||
"pylab import has clobbered these variables: ['__version__', 'random', 'datetime', 'plot']\n",
|
||
"`%matplotlib` prevents importing * from pylab and numpy\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Best score: 0.997\n",
|
||
"Best parameters set:\n",
|
||
"\tlearning_rate: 0.1\n",
|
||
"\tmax_depth: 8\n",
|
||
"\tn_estimators: 90\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
|
||
"\n",
|
||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||
"\n",
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
|
||
"\n",
|
||
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"日期\n",
|
||
"2024-12-23 3503.160645\n",
|
||
"Name: 日度预测价格, dtype: float32\n",
|
||
"{\"confirmFlg\":false,\"status\":true}\n",
|
||
"新增数据: ['2024-12-23', 7957.0, 6904.0, 0.08, 0.25, 3650.0, 1.54, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.05, '', 3500.0, 72.6, '', '', 3538.0, 27.0525, '', '', '', '', 229522.1, 8639.74, 3463.8854, '', '', 40121.2216621, 7423.12, '']\n",
|
||
"20241224\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
|
||
"\n",
|
||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Using matplotlib backend: QtAgg\n",
|
||
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
|
||
"Populating the interactive namespace from numpy and matplotlib\n",
|
||
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n",
|
||
"Best score: 0.997\n",
|
||
"Best parameters set:\n",
|
||
"\tlearning_rate: 0.1\n",
|
||
"\tmax_depth: 10\n",
|
||
"\tn_estimators: 100\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
|
||
"\n",
|
||
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
|
||
"\n",
|
||
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
|
||
"\n",
|
||
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"日期\n",
|
||
"2024-12-24 3499.874512\n",
|
||
"Name: 日度预测价格, dtype: float32\n",
|
||
"{\"confirmFlg\":false,\"status\":true}\n",
|
||
"新增数据: ['2024-12-24', 7984.0, 6904.0, 0.08, 0.25, 3650.0, 1.71, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.15, '', 3500.0, 72.6, 72.67, '', 3521.0, 25.6158, '', '', '', 13.33799789, 229522.1, 5417.02, 3427.8064, '', 1000.0, 44319.2299367, '', 3650.0]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from datetime import datetime, timedelta\n",
|
||
"\n",
|
||
"start_date = datetime(2024, 12, 23)\n",
|
||
"end_date = datetime(2024, 12, 25)\n",
|
||
"\n",
|
||
"while start_date < end_date:\n",
|
||
" print(start_date.strftime('%Y%m%d'))\n",
|
||
" start_3(start_date)\n",
|
||
" time.sleep(1)\n",
|
||
" start_2(start_date)\n",
|
||
" start_date += timedelta(days=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|