PriceForecast/aisenzhecode/液化石油气/液化气价格预测ytj.ipynb
2025-03-18 19:30:44 +08:00

893 lines
41 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import requests\n",
"import json\n",
"import xlrd\n",
"import xlwt\n",
"from datetime import datetime,timedelta\n",
"import time\n",
"# 变量定义\n",
"login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
"search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n",
"# search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryDataListItemNos\" \n",
"\n",
"\n",
"login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n",
"upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n",
"\n",
"login_data = {\n",
" \"data\": {\n",
" \"account\": \"api_dev\",\n",
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
" \"terminal\": \"API\"\n",
" },\n",
" \"funcModule\": \"API\",\n",
" \"funcOperation\": \"获取token\"\n",
"}\n",
"\n",
"login_push_data = {\n",
" \"data\": {\n",
" \"account\": \"api_dev\",\n",
" \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n",
" \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n",
" \"terminal\": \"API\"\n",
" },\n",
" \"funcModule\": \"API\",\n",
" \"funcOperation\": \"获取token\"\n",
"}\n",
"\n",
"read_file_path_name = \"液化气数据.xls\"\n",
"one_cols = []\n",
"two_cols = []\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sn\n",
"import random\n",
"import time\n",
"\n",
"\n",
"\n",
"\n",
"from plotly import __version__\n",
"from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
"\n",
"from sklearn import preprocessing\n",
"\n",
"from pandas import Series,DataFrame\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import sklearn.datasets as datasets\n",
"\n",
"#导入机器学习算法模型\n",
"from sklearn.linear_model import Lasso\n",
"from xgboost import XGBRegressor\n",
"\n",
"import statsmodels.api as sm\n",
"try:\n",
" from keras.preprocessing.sequence import TimeseriesGenerator\n",
"except:\n",
" from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
"\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"\n",
"import xgboost as xgb\n",
"from xgboost import plot_importance, plot_tree\n",
"from sklearn.metrics import mean_absolute_error\n",
"from statsmodels.tools.eval_measures import mse,rmse\n",
"from sklearn.model_selection import GridSearchCV\n",
"from xgboost import XGBRegressor\n",
"import warnings\n",
"import pickle\n",
"\n",
"from sklearn.metrics import mean_squared_error\n",
"\n",
"#切割训练数据和样本数据\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"#用于模型评分\n",
"from sklearn.metrics import r2_score\n",
"\n",
"le = preprocessing.LabelEncoder()\n",
"\n",
"# print(__version__) # requires version >= 1.9.0\n",
"\n",
"\n",
"import cufflinks as cf\n",
"cf.go_offline()\n",
"\n",
"random.seed(100)\n",
"\n",
"%matplotlib inline\n",
"\n",
"# 数据获取\n",
"\n",
"def get_head_auth():\n",
" login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n",
" text = json.loads(login_res.text)\n",
" if text[\"status\"]:\n",
" token = text[\"data\"][\"accessToken\"]\n",
" print('获取的token:',token)\n",
" return token\n",
" else:\n",
" print(\"获取认证失败\")\n",
" return None\n",
"\n",
"\n",
"def get_data_value(token, dataItemNoList,date):\n",
" search_data = {\n",
" \"data\": {\n",
" \"date\": date,\n",
" \"dataItemNoList\": dataItemNoList\n",
" },\n",
" \"funcModule\": \"数据项\",\n",
" \"funcOperation\": \"查询\"\n",
" }\n",
" \n",
"# search_data = {\n",
"# \"funcModule\": \"数据项\",\n",
"# \"funcOperation\": \"查询\",\n",
"# \"data\": {\n",
"# \"dateStart\": date,\n",
"# \"dateEnd\": date,\n",
"# \"dataItemNoList\":dataItemNoList # 数据项编码,代表 brent最低价和最高价\n",
"# }\n",
"# }\n",
" \n",
" headers = {\"Authorization\": token}\n",
" search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n",
" print('数据项查询参数search_data')\n",
" print(search_data)\n",
" print('数据项查询结果search_res')\n",
" print(search_res.text)\n",
" \n",
" try:\n",
" search_value = json.loads(search_res.text)[\"data\"]\n",
" \n",
" print(\"数据项查询结果:\", search_value)\n",
" except json.JSONDecodeError as e:\n",
" print(f\"Error decoding JSON: {e}\")\n",
" print(\"Response content:\", search_res.text)\n",
" return None \n",
" if search_value:\n",
" return search_value\n",
" else:\n",
" print(\"今天没有新数据\")\n",
" return search_value\n",
"\n",
"\n",
"\n",
"# xls文件处理\n",
"\n",
"def write_xls(data):\n",
" # 创建一个Workbook对象\n",
" workbook = xlwt.Workbook()\n",
"\n",
" # 创建一个Sheet对象可指定名称\n",
" sheet = workbook.load('Sheet1')\n",
"\n",
" # 写入数据行\n",
" for row_index, row_data in enumerate(data):\n",
" for col_index, cell_data in enumerate(row_data):\n",
" sheet.write(row_index, col_index, cell_data)\n",
"\n",
" # 保存Workbook到文件\n",
" workbook.save(get_cur_time()[0] + '.xls')\n",
"\n",
"\n",
"def get_cur_time(date = ''):\n",
" if date == '':\n",
" now = datetime.now()\n",
" else:\n",
" now = date\n",
" year = now.year\n",
" month = now.month\n",
" day = now.day\n",
"\n",
" if month < 10:\n",
" month = \"0\" + str(month)\n",
" if day < 10:\n",
" day = \"0\" + str(day)\n",
" cur_time = str(year) + str(month) + str(day)\n",
" cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
"# cur_time = '20231011'\n",
"# cur_time2 = '2023-10-11'\n",
" return cur_time, cur_time2\n",
"\n",
"\n",
"\n",
"def get_head_push_auth():\n",
" login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n",
" text = json.loads(login_res.text)\n",
" if text[\"status\"]:\n",
" token = text[\"data\"][\"accessToken\"]\n",
" return token\n",
" else:\n",
" print(\"获取认证失败\")\n",
" return None\n",
"\n",
"\n",
"\n",
"def upload_data_to_system(token_push,date):\n",
" data = {\n",
" \"funcModule\": \"数据表信息列表\",\n",
" \"funcOperation\": \"新增\",\n",
" \"data\": [\n",
" {\"dataItemNo\": \"250855713|Forecast_Price|ACN\",\n",
" \"dataDate\": date,\n",
" \"dataStatus\": \"add\",\n",
" \"dataValue\": forecast_price()\n",
" }\n",
"\n",
" ]\n",
" }\n",
" # headers = {\"Authorization\": token_push}\n",
" # res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
" # print(res.text)\n",
" print('预测值:',data['data'][0]['dataValue'])\n",
"\n",
" \n",
"# def upload_data_to_system(token):\n",
"# data = {\n",
"# \"funcModule\": \"数据表信息列表\",\n",
"# \"funcOperation\": \"新增\",\n",
"# \"data\": [\n",
"# {\"dataItemNo\": \"C01100036|Forecast_ Price|ACN\",\n",
"# \"dataDate\": '20230706',\n",
"# \"dataStatus\": \"add\",\n",
"# \"dataValue\": 3780.0\n",
"# }\n",
"\n",
"# ]\n",
"# }\n",
"# headers = {\"Authorization\": token}\n",
"# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n",
"# print(res.text)\n",
"\n",
"price_list = []\n",
" \n",
"def forecast_price():\n",
" # df_test = pd.read_csv('定价模型数据收集0212.csv')\n",
" df_test = pd.read_excel('液化气数据.xls')\n",
" df_test.drop([0],inplace=True)\n",
" try:\n",
" df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n",
" except:\n",
" df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n",
"\n",
"\n",
" df_test_1 = df_test\n",
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
"\n",
" # 选择用于模型训练的列名称\n",
" col_for_training = df_test_1.columns\n",
"\n",
"\n",
"\n",
"\n",
" import joblib\n",
" Best_model_DalyLGPrice = joblib.load(\"日度价格预测_液化气最佳模型.pkl\")\n",
" # 最新的一天为最后一行的数据\n",
" \n",
" df_test_1_Day = df_test_1.tail(1)\n",
" # 移除不需要的列\n",
" df_test_1_Day.index = df_test_1_Day[\"Date\"]\n",
" df_test_1_Day = df_test_1_Day.drop([\"Date\"], axis= 1)\n",
" df_test_1_Day=df_test_1_Day.drop('Price',axis=1)\n",
" df_test_1_Day=df_test_1_Day.dropna()\n",
"\n",
" for col in df_test_1_Day.columns:\n",
" df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce')\n",
" #预测今日价格,显示至小数点后两位\n",
" Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n",
"\n",
" df_test_1_Day['日度预测价格']=Ypredict_Today\n",
" print(df_test_1_Day['日度预测价格'])\n",
" a = df_test_1_Day['日度预测价格']\n",
" a = a[0]\n",
" a = float(a)\n",
" a = round(a,2)\n",
" price_list.append(a)\n",
" return a\n",
"def optimize_Model():\n",
" from sklearn.model_selection import train_test_split\n",
" from sklearn.impute import SimpleImputer\n",
" from sklearn.preprocessing import OrdinalEncoder\n",
" from sklearn.feature_selection import SelectFromModel\n",
" from sklearn.metrics import mean_squared_error, r2_score\n",
" import pandas as pd\n",
"\n",
" pd.set_option('display.max_rows',40) \n",
" pd.set_option('display.max_columns',40) \n",
" df_test = pd.read_excel('液化气数据.xls')\n",
" df_test.drop([0],inplace=True)\n",
" try:\n",
" df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)\n",
" except:\n",
" df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)\n",
"\n",
" \n",
" #将缺失值补为前一个或者后一个数值\n",
" df_test_1 = df_test\n",
" df_test_1=df_test_1.fillna(df_test.ffill())\n",
" df_test_1=df_test_1.fillna(df_test_1.bfill())\n",
" df_test_1[\"Date\"] = pd.to_datetime(df_test_1[\"Date\"])\n",
" df_test_1.index = df_test_1[\"Date\"]\n",
" df_test_1 = df_test_1.drop([\"Date\"], axis= 1)\n",
" df_test_1 = df_test_1.astype('float')\n",
" \n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" from pandas import Series,DataFrame\n",
"\n",
" import matplotlib.pyplot as plt\n",
"\n",
" import sklearn.datasets as datasets\n",
"\n",
" #导入机器学习算法模型\n",
" from sklearn.linear_model import Lasso\n",
" from xgboost import XGBRegressor\n",
"\n",
" import statsmodels.api as sm\n",
" try:\n",
" from keras.preprocessing.sequence import TimeseriesGenerator\n",
" except:\n",
" from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n",
"\n",
" import plotly.express as px\n",
" import plotly.graph_objects as go\n",
"\n",
" import xgboost as xgb\n",
" from xgboost import plot_importance, plot_tree\n",
" from sklearn.metrics import mean_absolute_error\n",
" from statsmodels.tools.eval_measures import mse,rmse\n",
" from sklearn.model_selection import GridSearchCV\n",
" from xgboost import XGBRegressor\n",
" import warnings\n",
" import pickle\n",
"\n",
" from sklearn.metrics import mean_squared_error\n",
"\n",
" #切割训练数据和样本数据\n",
" from sklearn.model_selection import train_test_split\n",
"\n",
" #用于模型评分\n",
" from sklearn.metrics import r2_score\n",
"\n",
" dataset1=df_test_1.drop('Price',axis=1)#.astype(float)\n",
"\n",
" y=df_test_1['Price']\n",
"\n",
" x=dataset1 \n",
"\n",
" train = x\n",
" target = y\n",
"\n",
" #切割数据样本集合测试集\n",
" X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n",
"\n",
" #模型缩写\n",
" Lasso = Lasso(random_state=0)\n",
" XGBR = XGBRegressor(random_state=0)\n",
" #训练模型\n",
" Lasso.fit(X_train,y_train)\n",
" XGBR.fit(X_train,y_train)\n",
" #模型拟合\n",
" y_pre_Lasso = Lasso.predict(x_test)\n",
" y_pre_XGBR = XGBR.predict(x_test)\n",
"\n",
" #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n",
" Lasso_score = r2_score(y_true,y_pre_Lasso)\n",
" XGBR_score=r2_score(y_true,y_pre_XGBR)\n",
"\n",
" #计算Lasso、XGBR的MSE和RMSE\n",
" Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n",
" XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n",
"\n",
" Lasso_RMSE=np.sqrt(Lasso_MSE)\n",
" XGBR_RMSE=np.sqrt(XGBR_MSE)\n",
" # 将不同模型的不同误差值整合成一个表格\n",
" model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n",
" ['XgBoost', XGBR_RMSE, XGBR_score]],\n",
" columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n",
" #将模型名称(Model)列设置为索引\n",
" model_results1=model_results.set_index('模型(Model)')\n",
"\n",
" model_results1\n",
" #定义plot_feature_importance函数该函数用于计算特征重要性。此部分代码无需调整\n",
" def plot_feature_importance(importance,names,model_type):\n",
" feature_importance = np.array(importance)\n",
" feature_names = np.array(names)\n",
"\n",
" data={'feature_names':feature_names,'feature_importance':feature_importance}\n",
" fi_df = pd.DataFrame(data)\n",
"\n",
" fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n",
"\n",
" plt.figure(figsize=(10,8))\n",
" sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n",
"\n",
" plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n",
" plt.xlabel('FEATURE IMPORTANCE')\n",
" plt.ylabel('FEATURE NAMES')\n",
" from pylab import mpl\n",
" %pylab\n",
" mpl.rcParams['font.sans-serif'] = ['SimHei']\n",
" ## Xgboost 模型参数优化-初步\n",
" #参考: https://juejin.im/post/6844903661013827598 \n",
" #每次调参时备选参数数值以同数量级的1、3、10设置即可比如设置1、3、10或0.1、0.3、1.0或0.01,0.03,0.10即可)\n",
"\n",
" from xgboost import XGBRegressor\n",
" from sklearn.model_selection import GridSearchCV\n",
"\n",
" estimator = XGBRegressor(random_state=0,\n",
" nthread=4,\n",
" seed=0\n",
" )\n",
" parameters = {\n",
" 'max_depth': range (2, 11, 2), # 树的最大深度\n",
" 'n_estimators': range (50, 101, 10), # 迭代次数\n",
" 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n",
" }\n",
"\n",
" grid_search_XGB = GridSearchCV(\n",
" estimator=estimator,\n",
" param_grid=parameters,\n",
" # n_jobs = 10,\n",
" cv = 3,\n",
" verbose=True\n",
" )\n",
"\n",
" grid_search_XGB.fit(X_train, y_train)\n",
" #如果电脑在此步骤报错可能是因为计算量太大超过硬件可支持程度可注释掉“n_jobs=10”一行\n",
"\n",
" best_parameters = grid_search_XGB.best_estimator_.get_params()\n",
" y_pred = grid_search_XGB.predict(x_test)\n",
"\n",
" op_XGBR_score = r2_score(y_true,y_pred)\n",
" op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n",
" op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n",
"\n",
" model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n",
" columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n",
" model_results2=model_results2.set_index('模型(Model)')\n",
"\n",
" try:\n",
" results = model_results1.append(model_results2, ignore_index = False)\n",
" except:\n",
" results = pd.concat([model_results1,model_results2],ignore_index=True)\n",
" import pickle\n",
"\n",
" Pkl_Filename = \"日度价格预测_液化气最佳模型.pkl\" \n",
"\n",
" with open(Pkl_Filename, 'wb') as file: \n",
" pickle.dump(grid_search_XGB, file)\n",
"\n",
"\n",
" \n",
"\n",
"def read_xls_data_bak():\n",
" global one_cols, two_cols\n",
" # 打开 XLS 文件\n",
" workbook = xlrd.open_workbook(read_file_path_name)\n",
"\n",
" # 获取所有表格名称\n",
" # sheet_names = workbook.sheet_names()\n",
"\n",
" # 选择第一个表格\n",
" sheet = workbook.sheet_by_index(0)\n",
"\n",
" # 获取行数和列数\n",
" num_rows = sheet.nrows\n",
" # num_cols = sheet.ncols\n",
"\n",
" # 遍历每一行,获取单元格数据\n",
" # for i in range(num_rows):\n",
" # row_data = sheet.row_values(i)\n",
" # one_cols.append(row_data)\n",
" # two_cols.append(row_data[1])\n",
"\n",
" row_data = sheet.row_values(1)\n",
" one_cols = row_data\n",
"\n",
" # 关闭 XLS 文件\n",
" # workbook.close()\n",
"\n",
"def read_xls_data():\n",
" global one_cols, two_cols\n",
" # 使用pandas读取Excel文件\n",
" df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n",
" # 获取第二行数据索引为1\n",
" one_cols = df.iloc[1].tolist()[1:]\n",
" print(f'获取到的数据项ID{one_cols}')\n",
"\n",
"def start(date=''):\n",
" read_xls_data()\n",
"\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" token_push = get_head_push_auth()\n",
" if not token_push:\n",
" return\n",
" cur_time,cur_time2 = getNow(date)\n",
" datas = get_data_value(token, one_cols[1:],cur_time)\n",
"# if not datas:\n",
"# return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [cur_time2]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols[1:]:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" save_xls(append_rows)\n",
" # optimize_Model()\n",
" # upload_data_to_system(token_push,cur_time)\n",
" \n",
" \n",
" \n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
"\n",
"def getNow(date='',offset=0):\n",
" \n",
" if date == '':\n",
" now = datetime.now() - timedelta(days=offset)\n",
" else:\n",
" if isinstance(date, datetime):\n",
" now = date\n",
" else:\n",
" try:\n",
" # 先尝试常见日期格式解析\n",
" now = datetime.strptime(str(date), \"%Y-%m-%d\")\n",
" except ValueError:\n",
" # 失败后尝试无分隔符格式\n",
" now = datetime.strptime(str(date), \"%Y%m%d\")\n",
" print(now,type(now))\n",
" now = now - timedelta(days=offset)\n",
" year = now.year\n",
" month = now.month\n",
" day = now.day\n",
"\n",
" if month < 10:\n",
" month = \"0\" + str(month)\n",
" if day < 10:\n",
" day = \"0\" + str(day)\n",
" cur_time = str(year) + str(month) + str(day)\n",
" cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n",
" return cur_time,cur_time2\n",
" \n",
"def start_1(date=''):\n",
" read_xls_data()\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
" \n",
" cur_time,cur_time2 = getNow(date,offset=1)\n",
" print(f\"补充{cur_time}数据\")\n",
" datas = get_data_value(token, one_cols[1:],date=cur_time)\n",
"# if not datas:\n",
"# return\n",
"\n",
" # data_list = [two_cols, one_cols]\n",
" append_rows = [cur_time]\n",
" dataItemNo_dataValue = {}\n",
" for data_value in datas:\n",
" if \"dataValue\" not in data_value:\n",
" print(data_value)\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n",
" else:\n",
" dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n",
" \n",
" for value in one_cols:\n",
" if value in dataItemNo_dataValue:\n",
" append_rows.append(dataItemNo_dataValue[value])\n",
" else:\n",
" append_rows.append(\"\")\n",
" print('添加的行:',append_rows)\n",
" save_xls_2(append_rows)\n",
"\n",
" \n",
" # data_list.append(three_cols)\n",
" # write_xls(data_list)\n",
" \n",
"def save_xls_1(append_rows):\n",
"\n",
" # 打开xls文件\n",
" workbook = xlrd.open_workbook('液化气数据.xls')\n",
"\n",
" # 获取所有sheet的个数\n",
" sheet_count = len(workbook.sheet_names())\n",
"\n",
" # 获取所有sheet的名称\n",
" sheet_names = workbook.sheet_names()\n",
"\n",
" new_workbook = xlwt.Workbook()\n",
" for i in range(sheet_count):\n",
" # 获取当前sheet\n",
" sheet = workbook.sheet_by_index(i)\n",
"\n",
" # 获取sheet的行数和列数\n",
" row_count = sheet.nrows - 1\n",
" col_count = sheet.ncols\n",
" # 获取原有数据\n",
" data = []\n",
" for row in range(row_count):\n",
" row_data = []\n",
" for col in range(col_count):\n",
" row_data.append(sheet.cell_value(row, col))\n",
" data.append(row_data)\n",
" # 创建xlwt的Workbook对象\n",
" # 创建sheet\n",
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
"\n",
" # 将原有的数据写入新的sheet\n",
" for row in range(row_count):\n",
" for col in range(col_count):\n",
" new_sheet.write(row, col, data[row][col])\n",
"\n",
" if i == 0:\n",
" # 在新的sheet中添加数据\n",
" for col in range(col_count):\n",
" new_sheet.write(row_count, col, append_rows[col])\n",
"\n",
" # 保存新的xls文件\n",
" new_workbook.save(\"液化气数据.xls\") \n",
"\n",
"def save_xls_2(append_rows):\n",
" try:\n",
" # 读取现有数据(假设第一行为列名)\n",
" df = pd.read_excel('液化气数据.xls', sheet_name=0)\n",
" print(df.columns)\n",
" append_rows = pd.DataFrame(append_rows, index=df.columns)\n",
" # 创建新数据行\n",
" new_date = append_rows['Date'].values\n",
" \n",
" dates = df['Date'].to_list()\n",
" \n",
" # 判断日期是否存在\n",
" if new_date in dates:\n",
" # 找到日期所在行的索引\n",
" date_mask = df['Date'] == new_date\n",
" # 存在则更新数据\n",
" df.loc[date_mask] = append_rows\n",
" print(f\"更新 {new_date} 数据\")\n",
" else:\n",
" # 不存在则追加数据\n",
" df = pd.concat([df, append_rows], ignore_index=True)\n",
" print(df.head())\n",
" print(df.tail())\n",
" print(f\"插入 {new_date} 新数据\")\n",
" \n",
" # 保存更新后的数据\n",
" df.to_excel('液化气数据.xls', index=False, engine='openpyxl')\n",
" \n",
" except FileNotFoundError:\n",
" # 如果文件不存在则创建新文件\n",
" pd.DataFrame([append_rows]).to_excel('液化气数据.xls', index=False, engine='openpyxl')\n",
" except Exception as e:\n",
" print(f\"保存数据时发生错误: {str(e)}\")\n",
" \n",
" \n",
"def check_data(dataItemNo):\n",
" token = get_head_auth()\n",
" if not token:\n",
" return\n",
"\n",
" datas = get_data_value(token, dataItemNo)\n",
" if not datas:\n",
" return\n",
"\n",
"\n",
"def save_xls(append_rows):\n",
"\n",
" # 打开xls文件\n",
" workbook = xlrd.open_workbook('液化气数据.xls')\n",
"\n",
" # 获取所有sheet的个数\n",
" sheet_count = len(workbook.sheet_names())\n",
"\n",
" # 获取所有sheet的名称\n",
" sheet_names = workbook.sheet_names()\n",
"\n",
" new_workbook = xlwt.Workbook()\n",
" for i in range(sheet_count):\n",
" # 获取当前sheet\n",
" sheet = workbook.sheet_by_index(i)\n",
"\n",
" # 获取sheet的行数和列数\n",
" row_count = sheet.nrows\n",
" col_count = sheet.ncols\n",
" # 获取原有数据\n",
" data = []\n",
" for row in range(row_count):\n",
" row_data = []\n",
" for col in range(col_count):\n",
" row_data.append(sheet.cell_value(row, col))\n",
" data.append(row_data)\n",
" # 创建xlwt的Workbook对象\n",
" # 创建sheet\n",
" new_sheet = new_workbook.add_sheet(sheet_names[i])\n",
"\n",
" # 将原有的数据写入新的sheet\n",
" for row in range(row_count):\n",
" for col in range(col_count):\n",
" new_sheet.write(row, col, data[row][col])\n",
"\n",
" if i == 0:\n",
" # 在新的sheet中添加数据\n",
" for col in range(col_count):\n",
" new_sheet.write(row_count, col, append_rows[col])\n",
"\n",
" # 保存新的xls文件\n",
" new_workbook.save(\"液化气数据.xls\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"\n",
"# if __name__ == \"__main__\":\n",
"# print('运行中')\n",
"# # 需要单独运行放开\n",
"# # start()\n",
"# # start_1(date='2025-01-22')\n",
"# # start_1()\n",
"\n",
"# # 每天定时12点运行\n",
"# while True:\n",
"# try:\n",
"# # 获取当前时间\n",
"# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n",
"# current_time_1 = time.strftime(\"%H:%M:%S\", time.localtime())\n",
"# # print(current_time_1)\n",
"\n",
"# # 判断当前时间是否为执行任务的时间点\n",
"# if current_time == \"09:15:00\":\n",
"# print(\"执行定时任务\")\n",
"# start()\n",
"\n",
"# # 休眠1秒钟避免过多占用CPU资源\n",
"# time.sleep(1)\n",
"\n",
"# elif current_time_1 == \"20:00:00\":\n",
"# print(\"更新数据\")\n",
"# start_1()\n",
"# time.sleep(1)\n",
"# except:\n",
"# print('执行错误')\n",
"# time.sleep(1)\n",
"\n",
"\n",
"# # 检测数据准确性, 需要检测放开\n",
"# # check_data(\"100028098|LISTING_PRICE\")\n",
"# # check_data(\"9137070016544622XB|DAY_Yield\")\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"获取到的数据项ID['YHQMXBB|C01100008|STRIKE_PRICE', 'C01100008|CORTED_VALUE', 'C01100008|AUCTION_MAX_PRICE', 'C01100008|AMOUNT', 'ICE_CL0_LAST_YEDAY_PRICE', '100028046|LISTING_PRICE', 'C01100008|PLAN_SALE', '91370200163576944B|C01100008|STRIKE_PRICE', '9137078672073757X8|C01100008|STRIKE_PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370305773165341A|C01100008|STRIKE_PRICE', '91370521164880008P|C01100008|STRIKE_PRICE', '91370321164425136B|C01100008|STRIKE_PRICE', 'SD|GC|ZDW|LIST_PRICE', '370500|ISOBUTANE|LIST_PRICE', 'SD|YT|SG|LIST_PRICE', '91110000710926094P|C01100008|SUPPLY_MERE', '91110000710932515R|C01100008|SUPPLY_MERE', '91370500674526498A|C01100008|SUPPLY_MERE', '91370321164425136B|C01100008|SUPPLY_MERE', 'C01100008|OTHER|SUPPLY_MERE', 'SD|WJH|DEMANDS', 'C01100008|SUY_DED_DAP', 'C01100008|EFFECTIVE_STOCK', '912102117169477344|C01100008|STRIKE_PRICE', '91110304102767480H|C01100008|STRIKE_PRICE', '91130193670310403L|C01100008|STRIKE_PRICE', 'HD|LPG|IMPORT_PRICE', 'SD|WJH|SALES_PRICE']\n",
"获取的token: eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhcGlfZGV2IiwidGgiOiI4YTQ1NzdkYmQ5MTk2NzU3NThkNTc5OTlhMWU4OTFmZSIsImx0IjoiYXBpIiwiaXNzIjoiIiwidG0iOiJQQyIsImV4cCI6MTc0MjMzMDEyMSwianRpIjoiMmI5ZmUzNTA5YjNmNGU4OTkzMjRiNzU1MzQ4ODlkNTQifQ.nezcKMQq4GnNoHKwvIOEe-1pK0Oz3LliiM8yYjOMG8c\n",
"补充20250228数据\n",
"数据项查询参数search_data\n",
"{'data': {'date': '20250228', 'dataItemNoList': ['C01100008|CORTED_VALUE', 'C01100008|AUCTION_MAX_PRICE', 'C01100008|AMOUNT', 'ICE_CL0_LAST_YEDAY_PRICE', '100028046|LISTING_PRICE', 'C01100008|PLAN_SALE', '91370200163576944B|C01100008|STRIKE_PRICE', '9137078672073757X8|C01100008|STRIKE_PRICE', '91370500674526498A|C01100008|STRIKE_PRICE', '91370305773165341A|C01100008|STRIKE_PRICE', '91370521164880008P|C01100008|STRIKE_PRICE', '91370321164425136B|C01100008|STRIKE_PRICE', 'SD|GC|ZDW|LIST_PRICE', '370500|ISOBUTANE|LIST_PRICE', 'SD|YT|SG|LIST_PRICE', '91110000710926094P|C01100008|SUPPLY_MERE', '91110000710932515R|C01100008|SUPPLY_MERE', '91370500674526498A|C01100008|SUPPLY_MERE', '91370321164425136B|C01100008|SUPPLY_MERE', 'C01100008|OTHER|SUPPLY_MERE', 'SD|WJH|DEMANDS', 'C01100008|SUY_DED_DAP', 'C01100008|EFFECTIVE_STOCK', '912102117169477344|C01100008|STRIKE_PRICE', '91110304102767480H|C01100008|STRIKE_PRICE', '91130193670310403L|C01100008|STRIKE_PRICE', 'HD|LPG|IMPORT_PRICE', 'SD|WJH|SALES_PRICE']}, 'funcModule': '数据项', 'funcOperation': '查询'}\n",
"数据项查询结果search_res\n",
"{\"confirmFlg\":false,\"data\":[{\"dataDate\":\"20250228\",\"dataItemNo\":\"100028046|LISTING_PRICE\",\"dataValue\":8441.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"370500|ISOBUTANE|LIST_PRICE\",\"dataValue\":5380.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91110000710926094P|C01100008|SUPPLY_MERE\",\"dataValue\":1300.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91110000710932515R|C01100008|SUPPLY_MERE\"},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91110304102767480H|C01100008|STRIKE_PRICE\",\"dataValue\":5150.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91130193670310403L|C01100008|STRIKE_PRICE\",\"dataValue\":5150.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"912102117169477344|C01100008|STRIKE_PRICE\",\"dataValue\":4670.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370200163576944B|C01100008|STRIKE_PRICE\",\"dataValue\":5300.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370305773165341A|C01100008|STRIKE_PRICE\",\"dataValue\":5600.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370321164425136B|C01100008|STRIKE_PRICE\",\"dataValue\":5500.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370321164425136B|C01100008|SUPPLY_MERE\",\"dataValue\":200.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370500674526498A|C01100008|STRIKE_PRICE\",\"dataValue\":5488.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370500674526498A|C01100008|SUPPLY_MERE\",\"dataValue\":175.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"91370521164880008P|C01100008|STRIKE_PRICE\",\"dataValue\":5455.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|AMOUNT\",\"dataValue\":342.72000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|AUCTION_MAX_PRICE\",\"dataValue\":5500.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|CORTED_VALUE\",\"dataValue\":5500.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|EFFECTIVE_STOCK\",\"dataValue\":-550.20000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|OTHER|SUPPLY_MERE\",\"dataValue\":5000.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|PLAN_SALE\",\"dataValue\":500.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"C01100008|SUY_DED_DAP\",\"dataValue\":-50.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"HD|LPG|IMPORT_PRICE\",\"dataValue\":5400.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"ICE_CL0_LAST_YEDAY_PRICE\",\"dataValue\":73.35000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"SD|GC|ZDW|LIST_PRICE\",\"dataValue\":5250.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"SD|WJH|DEMANDS\",\"dataValue\":8500.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"SD|WJH|SALES_PRICE\",\"dataValue\":8400.00000000},{\"dataDate\":\"20250228\",\"dataItemNo\":\"SD|YT|SG|LIST_PRICE\",\"dataValue\":6050.00000000}],\"status\":true}\n",
"数据项查询结果: [{'dataDate': '20250228', 'dataItemNo': '100028046|LISTING_PRICE', 'dataValue': 8441.0}, {'dataDate': '20250228', 'dataItemNo': '370500|ISOBUTANE|LIST_PRICE', 'dataValue': 5380.0}, {'dataDate': '20250228', 'dataItemNo': '91110000710926094P|C01100008|SUPPLY_MERE', 'dataValue': 1300.0}, {'dataDate': '20250228', 'dataItemNo': '91110000710932515R|C01100008|SUPPLY_MERE'}, {'dataDate': '20250228', 'dataItemNo': '91110304102767480H|C01100008|STRIKE_PRICE', 'dataValue': 5150.0}, {'dataDate': '20250228', 'dataItemNo': '91130193670310403L|C01100008|STRIKE_PRICE', 'dataValue': 5150.0}, {'dataDate': '20250228', 'dataItemNo': '912102117169477344|C01100008|STRIKE_PRICE', 'dataValue': 4670.0}, {'dataDate': '20250228', 'dataItemNo': '91370200163576944B|C01100008|STRIKE_PRICE', 'dataValue': 5300.0}, {'dataDate': '20250228', 'dataItemNo': '91370305773165341A|C01100008|STRIKE_PRICE', 'dataValue': 5600.0}, {'dataDate': '20250228', 'dataItemNo': '91370321164425136B|C01100008|STRIKE_PRICE', 'dataValue': 5500.0}, {'dataDate': '20250228', 'dataItemNo': '91370321164425136B|C01100008|SUPPLY_MERE', 'dataValue': 200.0}, {'dataDate': '20250228', 'dataItemNo': '91370500674526498A|C01100008|STRIKE_PRICE', 'dataValue': 5488.0}, {'dataDate': '20250228', 'dataItemNo': '91370500674526498A|C01100008|SUPPLY_MERE', 'dataValue': 175.0}, {'dataDate': '20250228', 'dataItemNo': '91370521164880008P|C01100008|STRIKE_PRICE', 'dataValue': 5455.0}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|AMOUNT', 'dataValue': 342.72}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|AUCTION_MAX_PRICE', 'dataValue': 5500.0}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|CORTED_VALUE', 'dataValue': 5500.0}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|EFFECTIVE_STOCK', 'dataValue': -550.2}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|OTHER|SUPPLY_MERE', 'dataValue': 5000.0}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|PLAN_SALE', 'dataValue': 500.0}, {'dataDate': '20250228', 'dataItemNo': 'C01100008|SUY_DED_DAP', 'dataValue': -50.0}, {'dataDate': '20250228', 'dataItemNo': 'HD|LPG|IMPORT_PRICE', 'dataValue': 5400.0}, {'dataDate': '20250228', 'dataItemNo': 'ICE_CL0_LAST_YEDAY_PRICE', 'dataValue': 73.35}, {'dataDate': '20250228', 'dataItemNo': 'SD|GC|ZDW|LIST_PRICE', 'dataValue': 5250.0}, {'dataDate': '20250228', 'dataItemNo': 'SD|WJH|DEMANDS', 'dataValue': 8500.0}, {'dataDate': '20250228', 'dataItemNo': 'SD|WJH|SALES_PRICE', 'dataValue': 8400.0}, {'dataDate': '20250228', 'dataItemNo': 'SD|YT|SG|LIST_PRICE', 'dataValue': 6050.0}]\n",
"{'dataDate': '20250228', 'dataItemNo': '91110000710932515R|C01100008|SUPPLY_MERE'}\n",
"添加的行: ['20250228', '', 5500.0, 5500.0, 342.72, 73.35, 8441.0, 500.0, 5300.0, '', 5488.0, 5600.0, 5455.0, 5500.0, 5250.0, 5380.0, 6050.0, 1300.0, '', 175.0, 200.0, 5000.0, 8500.0, -50.0, -550.2, 4670.0, 5150.0, 5150.0, 5400.0, 8400.0]\n",
"Index(['Date', 'Price', '修正价', '竞拍最高价', '液化石油气|发货量', '昨日布伦特价格', '昨日92#汽油价格',\n",
" '计划出货量', '青岛石化', '中化工-昌邑', '海科瑞林', '鑫泰石化|液化石油气|成交价', '垦利价格', '汇丰价格',\n",
" '正丁烷', '异丁烷价格', '顺酐', '中石化供应量', '中化工供应量', '海科供应量', '汇丰供应量', '京博和其他供应量',\n",
" '烷基化需求量', '昨日烷基化价差', '我司库存', '东北-大连石化', '华北-燕山石化', '华北-石家庄炼化',\n",
" '昨日原料气价格', '烷基化油销售价格'],\n",
" dtype='object')\n",
"保存数据时发生错误: 'Date'\n"
]
}
],
"source": [
"start_date = datetime(2025, 3, 1)\n",
"end_date = datetime(2025, 3, 2)\n",
"\n",
"while start_date < end_date:\n",
" date = start_date.strftime('%Y%m%d')\n",
" # start(date)\n",
" # time.sleep(1)\n",
" start_1(start_date)\n",
" start_date += timedelta(days=1)\n",
" time.sleep(5)\n",
"\n",
"# print(price_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}