PriceForecast/aisenzhecode/沥青/沥青定量价格预测每日推送-ytj.ipynb
2024-12-27 14:15:20 +08:00

964 lines
38 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import requests\n",
"import json\n",
"import xlrd\n",
"import xlwt\n",
"from datetime import datetime\n",
"import time\n",
"# 变量定义\n",
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
"\n",
"login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
"\n",
"login_data = {\n",
" \"data\": {\n",
" \"account\": \"api_dev\",\n",
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
" \"terminal\": \"API\"\n",
" },\n",
" \"funcModule\": \"API\",\n",
" \"funcOperation\": \"获取token\"\n",
"}\n",
"\n",
"login_push_data = {\n",
" \"data\": {\n",
" \"account\": \"api_dev\",\n",
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
" \"terminal\": \"API\"\n",
" },\n",
" \"funcModule\": \"API\",\n",
" \"funcOperation\": \"获取token\"\n",
"}\n",
"\n",
"read_file_path_name = \"沥青数据项.xls\"\n",
"one_cols = []\n",
"two_cols = []\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sn\n",
"import random\n",
"import time\n",
"\n",
"\n",
"\n",
"\n",
"from plotly import __version__\n",
"from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
"\n",
"from sklearn import preprocessing\n",
"\n",
"from pandas import Series,DataFrame\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import sklearn.datasets as datasets\n",
"\n",
"#导入机器学习算法模型\n",
"from sklearn.linear_model import Lasso\n",
"from xgboost import XGBRegressor\n",
"\n",
"import datetime\n",
"import statsmodels.api as sm\n",
"# from keras.preprocessing.sequence import TimeseriesGenerator\n",
"from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
"\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"\n",
"import xgboost as xgb\n",
"from xgboost import plot_importance, plot_tree\n",
"from sklearn.metrics import mean_absolute_error\n",
"from statsmodels.tools.eval_measures import mse,rmse\n",
"from sklearn.model_selection import GridSearchCV\n",
"from xgboost import XGBRegressor\n",
"import warnings\n",
"import pickle\n",
"\n",
"from sklearn.metrics import mean_squared_error\n",
"\n",
"#切割训练数据和样本数据\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"#用于模型评分\n",
"from sklearn.metrics import r2_score\n",
"\n",
"le = preprocessing.LabelEncoder()\n",
"\n",
"# print(__version__) # requires version >= 1.9.0\n",
"\n",
"\n",
"import cufflinks as cf\n",
"cf.go_offline()\n",
"\n",
"random.seed(100)\n",
"\n",
"%matplotlib inline\n",
"\n",
"# 数据获取\n",
"\n",
"def get_head_auth():\n",
" login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
" text = json.loads(login_res.text)\n",
" if text[\"status\"]:\n",
" token = text[\"data\"][\"accessToken\"]\n",
" return token\n",
" else:\n",
" print(\"获取认证失败\")\n",
" return None\n",
"\n",
"\n",
"def get_data_value(token, dataItemNoList,date=''):\n",
"\n",
" search_data = {\n",
" \"data\": {\n",
" \"date\": get_cur_time(date)[0],\n",
" \"dataItemNoList\": dataItemNoList\n",
" },\n",
" \"funcModule\": \"数据项\",\n",
" \"funcOperation\": \"查询\"\n",
" }\n",
" headers = {\"Authorization\": token}\n",
" search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
" search_value = json.loads(search_res.text)[\"data\"]\n",
" if search_value:\n",
" return search_value\n",
" else:\n",
" print(\"今天没有新数据\")\n",
" return None\n",
"\n",
"\n",
"# xls文件处理\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"def write_xls(data,date):\n",
" # 创建一个Workbook对象\n",
" workbook = xlwt.Workbook()\n",
"\n",
" # 创建一个Sheet对象可指定名称\n",
" sheet = workbook.load('Sheet1')\n",
"\n",
" # 写入数据行\n",
" for row_index, row_data in enumerate(data):\n",
" for col_index, cell_data in enumerate(row_data):\n",
" sheet.write(row_index, col_index, cell_data)\n",
"\n",
" # 保存Workbook到文件\n",
" workbook.save(get_cur_time(date)[0] + '.xls')\n",
"\n",
"\n",
"def get_cur_time(date = ''):\n",
" if date == '':\n",
" import datetime\n",
" now = datetime.datetime.now()\n",
" else:\n",
" now = date\n",
" year = now.year\n",
" month = now.month\n",
" day = now.day\n",
"\n",
" if month < 10:\n",
" month = \"0\" + str(month)\n",
" if day < 10:\n",
" day = \"0\" + str(day)\n",
" cur_time = str(year) + str(month) + str(day)\n",
" cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
"# cur_time = '20231007'\n",
"# cur_time2 = '2023-10-07'\n",
" return cur_time, cur_time2\n",
"\n",
"\n",
"def get_head_push_auth():\n",
" login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
" text = json.loads(login_res.text)\n",
" if text[\"status\"]:\n",
" token = text[\"data\"][\"accessToken\"]\n",
" return token\n",
" else:\n",
" print(\"获取认证失败\")\n",
" return None\n",
"\n",
"\n",
"\n",
"def upload_data_to_system(token_push,date):\n",
" data = {\n",
" \"funcModule\": \"数据表信息列表\",\n",
" \"funcOperation\": \"新增\",\n",
" \"data\": [\n",
" {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n",
" \"dataDate\": get_cur_time(date)[0],\n",
" \"dataStatus\": \"add\",\n",
" \"dataValue\": forecast_price()\n",
" }\n",
"\n",
" ]\n",
" }\n",
" headers = {\"Authorization\": token_push}\n",
" res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
" print(res.text)\n",
"\n",
" \n",
"# def upload_data_to_system(token):\n",
"# data = {\n",
"# \"funcModule\": \"数据表信息列表\",\n",
"# \"funcOperation\": \"新增\",\n",
"# \"data\": [\n",
"# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n",
"# \"dataDate\": '20230706',\n",
"# \"dataStatus\": \"add\",\n",
"# \"dataValue\": 3780.0\n",
"# }\n",
"\n",
"# ]\n",
"# }\n",
"# headers = {\"Authorization\": token}\n",
"# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
"# print(res.text)\n",
"\n",
"def forecast_price():\n",
" df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n",
" df_test.drop([0],inplace=True)\n",
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
" '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n",
" '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
" '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
" # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n",
" df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
"\n",
" #查看每个特征缺失值数量\n",
" MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
" #去掉缺失值百分比>0.4的特征去掉这些特征后的新表格命名为df_test_1\n",
" df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
" df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
" df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
" df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
" df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
"\n",
" #将缺失值补为前一个或者后一个数值\n",
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
"\n",
" # 选择用于模型训练的列名称\n",
" col_for_training = df_test_1.columns\n",
" import joblib\n",
" Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n",
" # 最新的一天为最后一行的数据\n",
" df_test_1_Day = df_test_1.tail(1)\n",
" # 移除不需要的列\n",
" df_test_1_Day.index = df_test_1_Day[\"日期\"]\n",
" df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n",
" df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n",
" df_test_1_Day=df_test_1_Day.dropna()\n",
"\n",
" # df_test_1_Day\n",
" #预测今日价格,显示至小数点后两位\n",
" Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
"\n",
" df_test_1_Day['日度预测价格']=Ypredict_Today\n",
" print(df_test_1_Day['日度预测价格'])\n",
" a = df_test_1_Day['日度预测价格']\n",
" a = a[0]\n",
" a = float(a)\n",
" a = round(a,2)\n",
" return a\n",
"def optimize_Model():\n",
" from sklearn.model_selection import train_test_split\n",
" from sklearn.impute import SimpleImputer\n",
" from sklearn.preprocessing import OrdinalEncoder\n",
" from sklearn.feature_selection import SelectFromModel\n",
" from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
" pd.set_option('display.max_rows',40) \n",
" pd.set_option('display.max_columns',40) \n",
" df_test = pd.read_excel('沥青数据项.xls',sheet_name='数据项历史数据')\n",
" df_test.drop([0],inplace=True)\n",
" df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
" '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n",
" '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n",
" '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n",
" '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n",
" '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n",
" # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n",
" # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n",
" df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n",
" # df_test.tail(3)\n",
" MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n",
" #去掉缺失值百分比>0.4的特征去掉这些特征后的新表格命名为df_test_1\n",
" df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n",
" df_MisVal_Check_1=df_MisVal_Check.reset_index()\n",
" df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n",
" df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n",
" df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n",
" #将缺失值补为前一个或者后一个数值\n",
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
" df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n",
" df_test_1.index = df_test_1[\"日期\"]\n",
" df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n",
" dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n",
"\n",
" y=df_test_1['京博指导价']\n",
"\n",
" x=dataset1 \n",
"\n",
" train = x\n",
" target = y\n",
"\n",
" #切割数据样本集合测试集\n",
" X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
" \n",
" \n",
" from sklearn.linear_model import Lasso\n",
" from xgboost import XGBRegressor\n",
"\n",
" from datetime import datetime\n",
" import statsmodels.api as sm\n",
" # from keras.preprocessing.sequence import TimeseriesGenerator\n",
" from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
"\n",
" import plotly.express as px\n",
" import plotly.graph_objects as go\n",
"\n",
" import xgboost as xgb\n",
" from xgboost import plot_importance, plot_tree\n",
" from sklearn.metrics import mean_absolute_error\n",
" from statsmodels.tools.eval_measures import mse,rmse\n",
" from sklearn.model_selection import GridSearchCV\n",
" from xgboost import XGBRegressor\n",
" import warnings\n",
" import pickle\n",
"\n",
" from sklearn.metrics import mean_squared_error\n",
"\n",
" #切割训练数据和样本数据\n",
" from sklearn.model_selection import train_test_split\n",
"\n",
" #用于模型评分\n",
" from sklearn.metrics import r2_score\n",
"\n",
" #模型缩写\n",
" Lasso = Lasso(random_state=0)\n",
" XGBR = XGBRegressor(random_state=0)\n",
" Lasso.fit(X_train,y_train)\n",
" XGBR.fit(X_train,y_train)\n",
" y_pre_Lasso = Lasso.predict(x_test)\n",
" y_pre_XGBR = XGBR.predict(x_test)\n",
"\n",
" #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
" Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
" XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
"\n",
" #计算Lasso、XGBR的MSE和RMSE\n",
" Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
" XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
"\n",
" Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
" XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
" model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
" ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
" columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
" model_results1=model_results.set_index('模型(Model)')\n",
"\n",
" def plot_feature_importance(importance,names,model_type):\n",
" feature_importance = np.array(importance)\n",
" feature_names = np.array(names)\n",
"\n",
" data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
" fi_df = pd.DataFrame(data)\n",
"\n",
" fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
"\n",
" plt.figure(figsize=(10,8))\n",
" sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
"\n",
" plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
" plt.xlabel('FEATURE IMPORTANCE')\n",
" plt.ylabel('FEATURE NAMES')\n",
" from pylab import mpl\n",
" %pylab\n",
" mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
" from xgboost import XGBRegressor\n",
" from sklearn.model_selection import GridSearchCV\n",
"\n",
" estimator = XGBRegressor(random_state=0,\n",
" nthread=4,\n",
" seed=0\n",
" )\n",
" parameters = {\n",
" 'max_depth': range (2, 11, 2), # 树的最大深度\n",
" 'n_estimators': range (50, 101, 10), # 迭代次数\n",
" 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
" }\n",
"\n",
" grid_search_XGB = GridSearchCV(\n",
" estimator=estimator,\n",
" param_grid=parameters,\n",
" # n_jobs = 10,\n",
" cv = 3,\n",
" verbose=True\n",
" )\n",
"\n",
" grid_search_XGB.fit(X_train, y_train)\n",
" print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n",
" print(\"Best parameters set:\")\n",
" best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
" for param_name in sorted(parameters.keys()):\n",
" print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n",
" y_pred = grid_search_XGB.predict(x_test)\n",
"\n",
" op_XGBR_score = r2_score(y_true,y_pred)\n",
" op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
" op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
"\n",
" model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
" columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
" model_results2=model_results2.set_index('模型(Model)')\n",
"\n",
" # results = model_results1.append(model_results2, ignore_index = False)\n",
" results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
" results\n",
" import pickle\n",
"\n",
" Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n",
"\n",
" with open(Pkl_Filename, 'wb') as file: \n",
" pickle.dump(grid_search_XGB, file)\n",
"\n",
"\n",
"\n",
"\n",
" \n",
"\n",
"\n",
" \n",
"\n",
"def read_xls_data():\n",
" global one_cols, two_cols\n",
" # 打开 XLS 文件\n",
" workbook = xlrd.open_workbook(read_file_path_name)\n",
"\n",
" # 获取所有表格名称\n",
" # sheet_names = workbook.sheet_names()\n",
"\n",
" # 选择第一个表格\n",
" sheet = workbook.sheet_by_index(0)\n",
"\n",
" # 获取行数和列数\n",
" num_rows = sheet.nrows\n",
" # num_cols = sheet.ncols\n",
"\n",
" # 遍历每一行,获取单元格数据\n",
" # for i in range(num_rows):\n",
" # row_data = sheet.row_values(i)\n",
" # one_cols.append(row_data)\n",
" # two_cols.append(row_data[1])\n",
"\n",
" row_data = sheet.row_values(1)\n",
" one_cols = row_data\n",
"\n",
" # 关闭 XLS 文件\n",
" # workbook.close()\n",
"\n",
"\n",
"\n",
"\n",
"def start():\n",
" '''预测上传数据'''\n",
" read_xls_data()\n",
"\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" token_push = get_head_push_auth()\n",
" if not token_push:\n",
" return\n",
"\n",
" datas = get_data_value(token, one_cols[1:])\n",
" if not datas:\n",
" return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [get_cur_time()[1]]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" print(data_value)\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols[1:]:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" save_xls(append_rows)\n",
" optimize_Model()\n",
" upload_data_to_system(token_push)\n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
"\n",
"\n",
"def start_3(date):\n",
" '''预测上传数据'''\n",
" read_xls_data()\n",
"\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" token_push = get_head_push_auth()\n",
" if not token_push:\n",
" return\n",
"\n",
" datas = get_data_value(token, one_cols[1:],date)\n",
" if not datas:\n",
" return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [get_cur_time(date)[1]]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" print(data_value)\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols[1:]:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" save_xls(append_rows)\n",
" optimize_Model()\n",
" upload_data_to_system(token_push,date)\n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
"\n",
"\n",
"\n",
"def start_1():\n",
" '''更新数据'''\n",
" read_xls_data()\n",
"\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" \n",
"\n",
" datas = get_data_value(token, one_cols[1:])\n",
" if not datas:\n",
" return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [get_cur_time()[1]]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" print(data_value)\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols[1:]:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" save_xls_1(append_rows)\n",
"\n",
" \n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
"\n",
"\n",
"def start_2(date):\n",
" '''更新数据'''\n",
" read_xls_data()\n",
"\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" \n",
"\n",
" datas = get_data_value(token, one_cols[1:],date)\n",
" if not datas:\n",
" return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [get_cur_time(date=date)[1]]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" print(data_value)\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols[1:]:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" print('新增数据:',append_rows)\n",
" save_xls_1(append_rows)\n",
"\n",
" \n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
" \n",
"def save_xls_1(append_rows):\n",
"\n",
" # 打开xls文件\n",
" workbook = xlrd.open_workbook('沥青数据项.xls')\n",
"\n",
" # 获取所有sheet的个数\n",
" sheet_count = len(workbook.sheet_names())\n",
"\n",
" # 获取所有sheet的名称\n",
" sheet_names = workbook.sheet_names()\n",
"\n",
" new_workbook = xlwt.Workbook()\n",
" for i in range(sheet_count):\n",
" # 获取当前sheet\n",
" sheet = workbook.sheet_by_index(i)\n",
"\n",
" # 获取sheet的行数和列数\n",
" row_count = sheet.nrows - 1\n",
" col_count = sheet.ncols\n",
" # 获取原有数据\n",
" data = []\n",
" for row in range(row_count):\n",
" row_data = []\n",
" for col in range(col_count):\n",
" row_data.append(sheet.cell_value(row, col))\n",
" data.append(row_data)\n",
" # 创建xlwt的Workbook对象\n",
" # 创建sheet\n",
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
"\n",
" # 将原有的数据写入新的sheet\n",
" for row in range(row_count):\n",
" for col in range(col_count):\n",
" new_sheet.write(row, col, data[row][col])\n",
"\n",
" if i == 0:\n",
" # 在新的sheet中添加数据\n",
" for col in range(col_count):\n",
" new_sheet.write(row_count, col, append_rows[col])\n",
"\n",
" # 保存新的xls文件\n",
" new_workbook.save(\"沥青数据项.xls\") \n",
"\n",
" \n",
" \n",
" \n",
"def check_data(dataItemNo):\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
"\n",
" datas = get_data_value(token, dataItemNo)\n",
" if not datas:\n",
" return\n",
"\n",
"\n",
"def save_xls(append_rows):\n",
"\n",
" # 打开xls文件\n",
" workbook = xlrd.open_workbook('沥青数据项.xls')\n",
"\n",
" # 获取所有sheet的个数\n",
" sheet_count = len(workbook.sheet_names())\n",
"\n",
" # 获取所有sheet的名称\n",
" sheet_names = workbook.sheet_names()\n",
"\n",
" new_workbook = xlwt.Workbook()\n",
" for i in range(sheet_count):\n",
" # 获取当前sheet\n",
" sheet = workbook.sheet_by_index(i)\n",
"\n",
" # 获取sheet的行数和列数\n",
" row_count = sheet.nrows\n",
" col_count = sheet.ncols\n",
" # 获取原有数据\n",
" data = []\n",
" for row in range(row_count):\n",
" row_data = []\n",
" for col in range(col_count):\n",
" row_data.append(sheet.cell_value(row, col))\n",
" data.append(row_data)\n",
" # 创建xlwt的Workbook对象\n",
" # 创建sheet\n",
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
"\n",
" # 将原有的数据写入新的sheet\n",
" for row in range(row_count):\n",
" for col in range(col_count):\n",
" new_sheet.write(row, col, data[row][col])\n",
"\n",
" if i == 0:\n",
" # 在新的sheet中添加数据\n",
" for col in range(col_count):\n",
" new_sheet.write(row_count, col, append_rows[col])\n",
"\n",
" # 保存新的xls文件\n",
" new_workbook.save(\"沥青数据项.xls\")\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" pass\n",
" # 需要单独运行放开\n",
"# start()\n",
"\n",
" # 每天定时12点运行\n",
" # while True:\n",
" # # 获取当前时间\n",
" # current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
" # current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
"\n",
" # # 判断当前时间是否为执行任务的时间点\n",
" # if current_time == \"12:00:00\":\n",
" # print(\"执行定时任务\")\n",
" # start()\n",
"\n",
" # # 休眠1秒钟避免过多占用CPU资源\n",
" # time.sleep(1)\n",
" \n",
" # elif current_time_1 == \"20:00:00\":\n",
" # print(\"更新数据\")\n",
" # start_1()\n",
" # time.sleep(1)\n",
"\n",
"\n",
"# # 检测数据准确性, 需要检测放开\n",
"# # check_data(\"100028098|LISTING_PRICE\")\n",
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20241223\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: <object object at 0x0000027F8B613090>\n",
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
"Populating the interactive namespace from numpy and matplotlib\n",
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\ProgramData\\anaconda3\\Lib\\site-packages\\IPython\\core\\magics\\pylab.py:162: UserWarning:\n",
"\n",
"pylab import has clobbered these variables: ['__version__', 'random', 'datetime', 'plot']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best score: 0.997\n",
"Best parameters set:\n",
"\tlearning_rate: 0.1\n",
"\tmax_depth: 8\n",
"\tn_estimators: 90\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
"\n",
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"日期\n",
"2024-12-23 3503.160645\n",
"Name: 日度预测价格, dtype: float32\n",
"{\"confirmFlg\":false,\"status\":true}\n",
"新增数据: ['2024-12-23', 7957.0, 6904.0, 0.08, 0.25, 3650.0, 1.54, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.05, '', 3500.0, 72.6, '', '', 3538.0, 27.0525, '', '', '', '', 229522.1, 8639.74, 3463.8854, '', '', 40121.2216621, 7423.12, '']\n",
"20241224\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:299: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: QtAgg\n",
"%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
"Populating the interactive namespace from numpy and matplotlib\n",
"Fitting 3 folds for each of 180 candidates, totalling 540 fits\n",
"Best score: 0.997\n",
"Best parameters set:\n",
"\tlearning_rate: 0.1\n",
"\tmax_depth: 10\n",
"\tn_estimators: 100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:239: UserWarning:\n",
"\n",
"The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
"\n",
"C:\\Users\\EDY\\AppData\\Local\\Temp\\ipykernel_2496\\2239815117.py:273: FutureWarning:\n",
"\n",
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"日期\n",
"2024-12-24 3499.874512\n",
"Name: 日度预测价格, dtype: float32\n",
"{\"confirmFlg\":false,\"status\":true}\n",
"新增数据: ['2024-12-24', 7984.0, 6904.0, 0.08, 0.25, 3650.0, 1.71, 0.0, 0.0, 3500.0, 7.9, 0.1, 0.2, 3500.0, 1.15, '', 3500.0, 72.6, 72.67, '', 3521.0, 25.6158, '', '', '', 13.33799789, 229522.1, 5417.02, 3427.8064, '', 1000.0, 44319.2299367, '', 3650.0]\n"
]
}
],
"source": [
"from datetime import datetime, timedelta\n",
"\n",
"start_date = datetime(2024, 12, 23)\n",
"end_date = datetime(2024, 12, 25)\n",
"\n",
"while start_date < end_date:\n",
" print(start_date.strftime('%Y%m%d'))\n",
" start_3(start_date)\n",
" time.sleep(1)\n",
" start_2(start_date)\n",
" start_date += timedelta(days=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}