{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\EDY\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", "\n" ] }, { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import requests\n", "import json\n", "import xlrd\n", "import xlwt\n", "from datetime import datetime, timedelta\n", "import time\n", "import pandas as pd\n", "\n", "# 变量定义\n", "login_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", "search_url = \"http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos\"\n", "queryDataListItemNos_url = \"http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos\"\n", "\n", "login_push_url = \"http://10.200.32.39/jingbo-api/api/server/login\"\n", "upload_url = \"http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList\"\n", "\n", "login_data = {\n", " \"data\": {\n", " \"account\": \"api_dev\",\n", " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", " \"terminal\": \"API\"\n", " },\n", " \"funcModule\": \"API\",\n", " \"funcOperation\": \"获取token\"\n", "}\n", "\n", "login_push_data = {\n", " \"data\": {\n", " \"account\": \"api_dev\",\n", " \"password\": \"ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=\",\n", " \"tenantHashCode\": \"8a4577dbd919675758d57999a1e891fe\",\n", " \"terminal\": \"API\"\n", " },\n", " \"funcModule\": \"API\",\n", " \"funcOperation\": \"获取token\"\n", "}\n", "\n", "read_file_path_name = \"沥青数据项.xlsx\"\n", "one_cols = []\n", "two_cols = []\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sn\n", "import random\n", "import time\n", "\n", "\n", "\n", "\n", "from plotly import __version__\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", "\n", "from sklearn import preprocessing\n", "\n", "from pandas import Series,DataFrame\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "import sklearn.datasets as datasets\n", "\n", "#导入机器学习算法模型\n", "from sklearn.linear_model import Lasso\n", "from xgboost import XGBRegressor\n", "\n", "import statsmodels.api as sm\n", "# from keras.preprocessing.sequence import TimeseriesGenerator\n", "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", "\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "\n", "import xgboost as xgb\n", "from xgboost import plot_importance, plot_tree\n", "from sklearn.metrics import mean_absolute_error\n", "from statsmodels.tools.eval_measures import mse,rmse\n", "from sklearn.model_selection import GridSearchCV\n", "from xgboost import XGBRegressor\n", "import warnings\n", "import pickle\n", "\n", "from sklearn.metrics import mean_squared_error\n", "\n", "#切割训练数据和样本数据\n", "from sklearn.model_selection import train_test_split\n", "\n", "#用于模型评分\n", "from sklearn.metrics import r2_score\n", "\n", "le = preprocessing.LabelEncoder()\n", "\n", "# print(__version__) # requires version >= 1.9.0\n", "\n", "\n", "import cufflinks as cf\n", "cf.go_offline()\n", "\n", "random.seed(100)\n", "\n", "%matplotlib inline\n", "\n", "# 数据获取\n", "\n", "def get_head_auth():\n", " login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))\n", " text = json.loads(login_res.text)\n", " if text[\"status\"]:\n", " token = text[\"data\"][\"accessToken\"]\n", " return token\n", " else:\n", " print(\"获取认证失败\")\n", " return None\n", "\n", "\n", "def get_data_value(token, dataItemNoList,date=''):\n", "\n", " search_data = {\n", " \"data\": {\n", " \"date\": getNow(date)[0],\n", " \"dataItemNoList\": dataItemNoList\n", " },\n", " \"funcModule\": \"数据项\",\n", " \"funcOperation\": \"查询\"\n", " }\n", " headers = {\"Authorization\": token}\n", " search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))\n", " search_value = json.loads(search_res.text)[\"data\"]\n", " if search_value:\n", " return search_value\n", " else:\n", " print(\"今天没有新数据\")\n", " return None\n", "\n", "\n", "# xls文件处理\n", "\n", "\n", "\n", "def getNow(date='', offset=0):\n", " \"\"\"生成指定日期的两种格式字符串\n", " Args:\n", " date: 支持多种输入类型:\n", " - datetime对象\n", " - 字符串格式(支持'%Y-%m-%d'和'%Y%m%d')\n", " - 空字符串表示当前日期\n", " offset: 日期偏移天数\n", " Returns:\n", " tuple: (紧凑日期字符串, 标准日期字符串)\n", " \"\"\"\n", " # 日期解析逻辑\n", " from datetime import datetime,timedelta\n", " if isinstance(date, datetime):\n", " now = date\n", " else:\n", " now = datetime.now()\n", " if date:\n", " # 尝试多种日期格式解析\n", " for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):\n", " try:\n", " now = datetime.strptime(str(date), fmt)\n", " break\n", " except ValueError:\n", " continue\n", " else:\n", " raise ValueError(f\"无法解析的日期格式: {date}\")\n", "\n", " # 应用日期偏移\n", " now = now - timedelta(days=offset)\n", " \n", " # 统一格式化输出\n", " date_str = now.strftime(\"%Y-%m-%d\")\n", " compact_date = date_str.replace(\"-\", \"\")\n", " return compact_date, date_str\n", "\n", "\n", "\n", "# def get_cur_time(date = ''):\n", "# if date == '':\n", "# now = datetime.now()\n", "# else:\n", "# now = date\n", "# year = now.year\n", "# month = now.month\n", "# day = now.day\n", "\n", "# if month < 10:\n", "# month = \"0\" + str(month)\n", "# if day < 10:\n", "# day = \"0\" + str(day)\n", "# cur_time = str(year) + str(month) + str(day)\n", "# cur_time2 = str(year) + \"-\" + str(month) + \"-\" + str(day)\n", "# # cur_time = '20231007'\n", "# # cur_time2 = '2023-10-07'\n", "# return cur_time, cur_time2\n", "\n", "\n", "def get_head_push_auth():\n", " login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))\n", " text = json.loads(login_res.text)\n", " if text[\"status\"]:\n", " token = text[\"data\"][\"accessToken\"]\n", " return token\n", " else:\n", " print(\"获取认证失败\")\n", " return None\n", "\n", "def upload_data_to_system(token_push,date):\n", " data = {\n", " \"funcModule\": \"数据表信息列表\",\n", " \"funcOperation\": \"新增\",\n", " \"data\": [\n", " {\"dataItemNo\": \"C01100036|Forecast_Price|ACN\",\n", " \"dataDate\": getNow(date)[0],\n", " \"dataStatus\": \"add\",\n", " \"dataValue\": forecast_price()\n", " }\n", "\n", " ]\n", " }\n", " headers = {\"Authorization\": token_push}\n", " res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))\n", " print(res.text)\n", "\n", "def forecast_price():\n", " df_test = pd.read_excel('沥青数据项.xlsx')\n", " df_test.drop([0],inplace=True)\n", " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',\n", " '齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)\n", " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", "\n", " #查看每个特征缺失值数量\n", " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", "\n", " #将缺失值补为前一个或者后一个数值\n", " df_test_1=df_test_1.fillna(df_test.ffill())\n", " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", "\n", " # 选择用于模型训练的列名称\n", " col_for_training = df_test_1.columns\n", " import joblib\n", " Best_model_DalyLGPrice = joblib.load(\"日度价格预测_最佳模型.pkl\")\n", " # 最新的一天为最后一行的数据\n", " df_test_1_Day = df_test_1.tail(1)\n", " # 移除不需要的列\n", " df_test_1_Day.index = df_test_1_Day[\"日期\"]\n", " df_test_1_Day = df_test_1_Day.drop([\"日期\"], axis= 1)\n", " df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)\n", " df_test_1_Day=df_test_1_Day.dropna()\n", "\n", " # df_test_1_Day\n", " #预测今日价格,显示至小数点后两位\n", " Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)\n", "\n", " df_test_1_Day['日度预测价格']=Ypredict_Today\n", " print(df_test_1_Day['日度预测价格'])\n", " a = df_test_1_Day['日度预测价格']\n", " a = a[0]\n", " a = float(a)\n", " a = round(a,2)\n", " return a\n", "def optimize_Model():\n", " from sklearn.model_selection import train_test_split\n", " from sklearn.impute import SimpleImputer\n", " from sklearn.preprocessing import OrdinalEncoder\n", " from sklearn.feature_selection import SelectFromModel\n", " from sklearn.metrics import mean_squared_error, r2_score\n", "\n", " pd.set_option('display.max_rows',40) \n", " pd.set_option('display.max_columns',40) \n", " df_test = pd.read_excel('沥青数据项.xlsx')\n", " df_test.drop([0],inplace=True)\n", " df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", " '京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',\n", " '科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',\n", " '布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',\n", " '隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',\n", " '京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')\n", " # df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')\n", " # df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)\n", " df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)\n", " # df_test.tail(3)\n", " MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)\n", " #去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1\n", " df_MisVal_Check = pd.DataFrame(MisVal_Check,)#\n", " df_MisVal_Check_1=df_MisVal_Check.reset_index()\n", " df_MisVal_Check_1.columns=['Variable_Name','Missing_Number'] \n", " df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)\n", " df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)\n", " #将缺失值补为前一个或者后一个数值\n", " df_test_1=df_test_1.fillna(df_test.ffill())\n", " df_test_1=df_test_1.fillna(df_test_1.bfill())\n", " df_test_1[\"日期\"] = pd.to_datetime(df_test_1[\"日期\"])\n", " df_test_1.index = df_test_1[\"日期\"]\n", " df_test_1 = df_test_1.drop([\"日期\"], axis= 1)\n", " dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)\n", "\n", " y=df_test_1['京博指导价']\n", "\n", " x=dataset1 \n", "\n", " train = x\n", " target = y\n", "\n", " #切割数据样本集合测试集\n", " X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)\n", " \n", " \n", " from sklearn.linear_model import Lasso\n", " from xgboost import XGBRegressor\n", "\n", " import statsmodels.api as sm\n", " # from keras.preprocessing.sequence import TimeseriesGenerator\n", " from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", "\n", " import plotly.express as px\n", " import plotly.graph_objects as go\n", "\n", " import xgboost as xgb\n", " from xgboost import plot_importance, plot_tree\n", " from sklearn.metrics import mean_absolute_error\n", " from statsmodels.tools.eval_measures import mse,rmse\n", " from sklearn.model_selection import GridSearchCV\n", " from xgboost import XGBRegressor\n", " import warnings\n", " import pickle\n", "\n", " from sklearn.metrics import mean_squared_error\n", "\n", " #切割训练数据和样本数据\n", " from sklearn.model_selection import train_test_split\n", "\n", " #用于模型评分\n", " from sklearn.metrics import r2_score\n", "\n", " #模型缩写\n", " Lasso = Lasso(random_state=0)\n", " XGBR = XGBRegressor(random_state=0)\n", " Lasso.fit(X_train,y_train)\n", " XGBR.fit(X_train,y_train)\n", " y_pre_Lasso = Lasso.predict(x_test)\n", " y_pre_XGBR = XGBR.predict(x_test)\n", "\n", " #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²\n", " Lasso_score = r2_score(y_true,y_pre_Lasso)\n", " XGBR_score=r2_score(y_true,y_pre_XGBR)\n", "\n", " #计算Lasso、XGBR的MSE和RMSE\n", " Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)\n", " XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)\n", "\n", " Lasso_RMSE=np.sqrt(Lasso_MSE)\n", " XGBR_RMSE=np.sqrt(XGBR_MSE)\n", " model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],\n", " ['XgBoost', XGBR_RMSE, XGBR_score]],\n", " columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])\n", " model_results1=model_results.set_index('模型(Model)')\n", "\n", " def plot_feature_importance(importance,names,model_type):\n", " feature_importance = np.array(importance)\n", " feature_names = np.array(names)\n", "\n", " data={'feature_names':feature_names,'feature_importance':feature_importance}\n", " fi_df = pd.DataFrame(data)\n", "\n", " fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)\n", "\n", " plt.figure(figsize=(10,8))\n", " sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", "\n", " plt.title(model_type + \" \"+'FEATURE IMPORTANCE')\n", " plt.xlabel('FEATURE IMPORTANCE')\n", " plt.ylabel('FEATURE NAMES')\n", " from pylab import mpl\n", " %pylab\n", " mpl.rcParams['font.sans-serif'] = ['SimHei']\n", " from xgboost import XGBRegressor\n", " from sklearn.model_selection import GridSearchCV\n", "\n", " estimator = XGBRegressor(random_state=0,\n", " nthread=4,\n", " seed=0\n", " )\n", " parameters = {\n", " 'max_depth': range (2, 11, 2), # 树的最大深度\n", " 'n_estimators': range (50, 101, 10), # 迭代次数\n", " 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]\n", " }\n", "\n", " grid_search_XGB = GridSearchCV(\n", " estimator=estimator,\n", " param_grid=parameters,\n", " # n_jobs = 10,\n", " cv = 3,\n", " verbose=True\n", " )\n", "\n", " grid_search_XGB.fit(X_train, y_train)\n", " print(\"Best score: %0.3f\" % grid_search_XGB.best_score_)\n", " print(\"Best parameters set:\")\n", " best_parameters = grid_search_XGB.best_estimator_.get_params()\n", " for param_name in sorted(parameters.keys()):\n", " print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))\n", " y_pred = grid_search_XGB.predict(x_test)\n", "\n", " op_XGBR_score = r2_score(y_true,y_pred)\n", " op_XGBR_MSE= mean_squared_error(y_true, y_pred)\n", " op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)\n", "\n", " model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],\n", " columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])\n", " model_results2=model_results2.set_index('模型(Model)')\n", "\n", " # results = model_results1.append(model_results2, ignore_index = False)\n", " results = pd.concat([model_results1,model_results2],ignore_index=True)\n", " results\n", " import pickle\n", "\n", " Pkl_Filename = \"日度价格预测_最佳模型.pkl\" \n", "\n", " with open(Pkl_Filename, 'wb') as file: \n", " pickle.dump(grid_search_XGB, file)\n", "\n", "def read_xls_data():\n", " \"\"\"获取特征项ID\"\"\"\n", " global one_cols, two_cols\n", " # 使用pandas读取Excel文件\n", " df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名\n", " # 获取第二行数据(索引为1)\n", " one_cols = df.iloc[1].tolist()[1:]\n", " print(f'获取到的数据项ID{one_cols}')\n", "\n", "# def read_xls_data():\n", "# global one_cols, two_cols\n", "# # 打开 XLS 文件\n", "# workbook = xlrd.open_workbook(read_file_path_name)\n", "\n", "# # 获取所有表格名称\n", "# # sheet_names = workbook.sheet_names()\n", "\n", "# # 选择第一个表格\n", "# sheet = workbook.sheet_by_index(0)\n", "\n", "# # 获取行数和列数\n", "# num_rows = sheet.nrows\n", "# # num_cols = sheet.ncols\n", "\n", "# # 遍历每一行,获取单元格数据\n", "# # for i in range(num_rows):\n", "# # row_data = sheet.row_values(i)\n", "# # one_cols.append(row_data)\n", "# # two_cols.append(row_data[1])\n", "\n", "# row_data = sheet.row_values(1)\n", "# one_cols = row_data\n", "\n", "# # 关闭 XLS 文件\n", "# # workbook.close()\n", "\n", "def start_3(date=None,token=None,token_push=None):\n", " '''预测上传数据'''\n", " read_xls_data()\n", "\n", " if date==None:\n", " date = datetime.now()\n", " if token==None:\n", " token = get_head_auth()\n", " if token_push==None:\n", " token = get_head_auth()\n", "\n", " datas = get_data_value(token, one_cols[1:],date)\n", " if not datas:\n", " return\n", "\n", " # data_list = [two_cols, one_cols]\n", " append_rows = [get_cur_time(date)[1]]\n", " dataItemNo_dataValue = {}\n", " for data_value in datas:\n", " if \"dataValue\" not in data_value:\n", " print(data_value)\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", " else:\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", " \n", " for value in one_cols[1:]:\n", " if value in dataItemNo_dataValue:\n", " append_rows.append(dataItemNo_dataValue[value])\n", " else:\n", " append_rows.append(\"\")\n", " save_xls(append_rows)\n", "\n", " # 获取当月的数据写入到指定文件\n", " # optimize_Model()\n", " # upload_data_to_system(token_push,date)\n", " # data_list.append(three_cols)\n", " # write_xls(data_list)\n", "\n", "def check_data(dataItemNo):\n", " token = get_head_auth()\n", " if not token:\n", " return\n", "\n", " datas = get_data_value(token, dataItemNo)\n", " if not datas:\n", " return\n", "\n", "def save_xls(append_rows):\n", "\n", " # 打开xls文件\n", " workbook = xlrd.open_workbook('沥青数据项.xls')\n", "\n", " # 获取所有sheet的个数\n", " sheet_count = len(workbook.sheet_names())\n", "\n", " # 获取所有sheet的名称\n", " sheet_names = workbook.sheet_names()\n", "\n", " new_workbook = xlwt.Workbook()\n", " for i in range(sheet_count):\n", " # 获取当前sheet\n", " sheet = workbook.sheet_by_index(i)\n", "\n", " # 获取sheet的行数和列数\n", " row_count = sheet.nrows\n", " col_count = sheet.ncols\n", " # 获取原有数据\n", " data = []\n", " for row in range(row_count):\n", " row_data = []\n", " for col in range(col_count):\n", " row_data.append(sheet.cell_value(row, col))\n", " data.append(row_data)\n", " # 创建xlwt的Workbook对象\n", " # 创建sheet\n", " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", "\n", " # 将原有的数据写入新的sheet\n", " for row in range(row_count):\n", " for col in range(col_count):\n", " new_sheet.write(row, col, data[row][col])\n", "\n", " if i == 0:\n", " # 在新的sheet中添加数据\n", " for col in range(col_count):\n", " new_sheet.write(row_count, col, append_rows[col])\n", "\n", " # 保存新的xls文件\n", " new_workbook.save(\"沥青数据项.xlsx\")\n", "\n", "def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):\n", "\n", " search_data = {\n", " \"funcModule\": \"数据项\",\n", " \"funcOperation\": \"查询\",\n", " \"data\": {\n", " \"dateStart\": dateStart,\n", " \"dateEnd\": dateEnd,\n", " \"dataItemNoList\": dataItemNoList # 数据项编码,代表 brent最低价和最高价\n", " }\n", " }\n", "\n", " headers = {\"Authorization\": token}\n", " search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))\n", " search_value = json.loads(search_res.text)[\"data\"]\n", " if search_value:\n", " return search_value\n", " else:\n", " return None\n", "\n", "\n", "def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", " current_year_month = datetime.now().strftime('%Y-%m')\n", " grouped = data_df.groupby(\"dataDate\")\n", "\n", " # 使用openpyxl打开xlsx文件\n", " from openpyxl import load_workbook\n", " workbook = load_workbook('沥青数据项.xlsx')\n", "\n", " # 创建新工作簿\n", " new_workbook = load_workbook('沥青数据项.xlsx')\n", " \n", " for sheetname in workbook.sheetnames:\n", " sheet = workbook[sheetname]\n", " new_sheet = new_workbook[sheetname]\n", " \n", " current_year_month_row = 0\n", " # 查找当前月份数据起始行\n", " for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):\n", " if str(row[0]).startswith(current_year_month):\n", " current_year_month_row += 1\n", "\n", " # 追加新数据\n", " if sheetname == workbook.sheetnames[0]:\n", " start_row = sheet.max_row - current_year_month_row + 1\n", " for row_idx, (date, group) in enumerate(grouped, start=start_row):\n", " new_sheet.cell(row=row_idx, column=1, value=date)\n", " for j, dataItemNo in enumerate(dataItemNoList, start=2):\n", " if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", " new_sheet.cell(row=row_idx, column=j, \n", " value=group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", "\n", " # 保存修改后的xlsx文件\n", " new_workbook.save(\"沥青数据项.xlsx\")\n", "\n", "\n", "# def save_queryDataListItemNos_xls(data_df,dataItemNoList):\n", "# from datetime import datetime, timedelta\n", "# current_year_month = datetime.now().strftime('%Y-%m')\n", "# grouped = data_df.groupby(\"dataDate\")\n", "\n", "# # 打开xls文件\n", "# workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", "\n", "# # 获取所有sheet的个数\n", "# sheet_count = len(workbook.sheet_names())\n", "\n", "# # 获取所有sheet的名称\n", "# sheet_names = workbook.sheet_names()\n", "\n", "# new_workbook = xlwt.Workbook()\n", "# for i in range(sheet_count):\n", "# # 获取当前sheet\n", "# sheet = workbook.sheet_by_index(i)\n", "\n", "# # 获取sheet的行数和列数\n", "# row_count = sheet.nrows\n", "# col_count = sheet.ncols\n", "# # 获取原有数据\n", "# data = []\n", "# for row in range(row_count):\n", "# row_data = []\n", "# for col in range(col_count):\n", "# row_data.append(sheet.cell_value(row, col))\n", "# data.append(row_data)\n", "\n", "# # 创建xlwt的Workbook对象\n", "# # 创建sheet\n", "# new_sheet = new_workbook.add_sheet(sheet_names[i])\n", "\n", "\n", "# current_year_month_row = 0\n", "# # 将原有的数据写入新的sheet\n", "# for row in range(row_count):\n", "# for col in range(col_count):\n", "# col0 = data[row][0]\n", "# # print(\"col0\",col0[:7])\n", "# if col0[:7] == current_year_month:\n", "# current_year_month_row += 1\n", "# break\n", "# new_sheet.write(row, col, data[row][col])\n", "\n", "\n", "# # print(\"current_year_month_row\",current_year_month_row)\n", "# if i == 0:\n", "# rowFlag = 0\n", "# # 查看每组数据\n", "# for date, group in grouped:\n", "# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)\n", "# for j in range(len(dataItemNoList)):\n", "# dataItemNo = dataItemNoList[j]\n", "\n", "# # for dataItemNo in dataItemNoList:\n", "# if group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values:\n", "\n", "# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group[\"dataItemNo\"] == dataItemNo][\"dataValue\"].values[0])\n", "\n", "# rowFlag += 1\n", "\n", "\n", "# # 保存新的xls文件\n", "# new_workbook.save(\"沥青数据项.xlsx\")\n", "\n", "def queryDataListItemNos(token=None):\n", " from datetime import datetime, timedelta\n", " df = pd.read_excel('沥青数据项.xlsx')\n", " dataItemNoList = df.iloc[0].tolist()[1:]\n", " \n", " if token is None:\n", " token = get_head_auth()\n", "\n", " if not token:\n", " print('token获取失败')\n", " return\n", "\n", " # 获取当前日期\n", " current_date = datetime.now()\n", "\n", " # 获取当月1日\n", " first_day_of_month = current_date.replace(day=1)\n", "\n", " # 格式化为 YYYYMMDD 格式\n", " dateEnd = current_date.strftime('%Y%m%d')\n", " dateStart = first_day_of_month.strftime('%Y%m%d')\n", "\n", " search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)\n", "\n", "\n", " data_df = pd.DataFrame(search_value)\n", "\n", " data_df[\"dataDate\"] = pd.to_datetime(data_df[\"dataDate\"])\n", " data_df[\"dataDate\"] = data_df[\"dataDate\"].dt.strftime('%Y-%m-%d')\n", " save_queryDataListItemNos_xls(data_df,dataItemNoList)\n", " print('当月数据更新完成')\n", "\n", "\n", "def save_xls_1(append_rows):\n", "\n", " # 打开xls文件\n", " workbook = xlrd.open_workbook('沥青数据项.xlsx')\n", "\n", " # 获取所有sheet的个数\n", " sheet_count = len(workbook.sheet_names())\n", "\n", " # 获取所有sheet的名称\n", " sheet_names = workbook.sheet_names()\n", "\n", " new_workbook = xlwt.Workbook()\n", " for i in range(sheet_count):\n", " # 获取当前sheet\n", " sheet = workbook.sheet_by_index(i)\n", "\n", " # 获取sheet的行数和列数\n", " row_count = sheet.nrows - 1\n", " col_count = sheet.ncols\n", " # 获取原有数据\n", " data = []\n", " for row in range(row_count):\n", " row_data = []\n", " for col in range(col_count):\n", " row_data.append(sheet.cell_value(row, col))\n", " data.append(row_data)\n", " # 创建xlwt的Workbook对象\n", " # 创建sheet\n", " new_sheet = new_workbook.add_sheet(sheet_names[i])\n", "\n", " # 将原有的数据写入新的sheet\n", " for row in range(row_count):\n", " for col in range(col_count):\n", " new_sheet.write(row, col, data[row][col])\n", "\n", " if i == 0:\n", " # 在新的sheet中添加数据\n", " for col in range(col_count):\n", " new_sheet.write(row_count, col, append_rows[col])\n", "\n", " # 保存新的xls文件\n", " new_workbook.save(\"沥青数据项.xlsx\") \n", "\n", "\n", "\n", "def start(date=''):\n", " \"\"\"获取当日数据\"\"\"\n", " read_xls_data()\n", " token = get_head_auth()\n", " if not token:\n", " return\n", " \n", " cur_time,cur_time2 = getNow(date)\n", " print(f\"获取{cur_time}数据\")\n", " datas = get_data_value(token, one_cols,date=cur_time)\n", " print(len(datas))\n", " print(datas)\n", " if not datas:\n", " return\n", "\n", " append_rows = [cur_time2]\n", " dataItemNo_dataValue = {}\n", " for data_value in datas:\n", " if \"dataValue\" not in data_value:\n", " print(data_value)\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", " else:\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", " \n", " for value in one_cols:\n", " if value in dataItemNo_dataValue:\n", " append_rows.append(dataItemNo_dataValue[value])\n", " else:\n", " append_rows.append(\"\")\n", " \n", " print('添加的行:',len(append_rows),append_rows)\n", " save_xls_2(append_rows)\n", "\n", "\n", "def save_xls_2(append_rows):\n", " \"\"\"保存或更新数据到Excel文件\n", " 参数:\n", " append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]\n", " \"\"\"\n", " # try:\n", " # 读取现有数据(假设第一行为列名)\n", " df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)\n", " print('文件中的数据列数:',len(df.columns),df.columns)\n", " # 转换append_rows为DataFrame\n", " if len(append_rows) != len(df.columns):\n", " # 去除第二个元素 ,不知道什么原因多一个空数据\n", " append_rows.pop(1)\n", " append_rows = pd.DataFrame([append_rows],columns=df.columns)\n", " # 创建新数据行\n", " new_date = append_rows['日期'].values[0]\n", " \n", " dates = df['日期'].to_list()\n", " # 判断日期是否存在\n", " if new_date in dates:\n", " # 找到日期所在行的索引\n", " date_mask = df['日期'] == new_date\n", " # 存在则更新数据\n", " df.loc[date_mask] = append_rows.values\n", " print(f\"更新 {new_date} 数据\")\n", " else:\n", " # 不存在则追加数据\n", " df = pd.concat([df, append_rows], ignore_index=True)\n", " print(df.head())\n", " print(df.tail())\n", " print(f\"插入 {new_date} 新数据\")\n", " \n", " # 保存更新后的数据\n", " df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", " \n", " # except FileNotFoundError:\n", " # # 如果文件不存在则创建新文件\n", " # pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')\n", " # except Exception as e:\n", " # print(f\"保存数据时发生错误: {str(e)}\")\n", "\n", "\n", "def main(start_date=None,token=None,token_push=None):\n", " from datatime import datetime\n", " if start_date is None:\n", " start_date = datetime.now()\n", " if token is None:\n", " token = get_head_auth()\n", " if token_push is None:\n", " token_push = get_head_push_auth()\n", " date = start_date.strftime('%Y%m%d')\n", " print(date)\n", "# start(date)\n", " # 更新当月数据\n", " queryDataListItemNos(token)\n", " # 训练模型\n", " optimize_Model()\n", " # # 预测&上传预测结果\n", " upload_data_to_system(token_push,start_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'datetime' has no attribute 'now'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# if __name__ == \"__main__\":\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# print(\"运行中ing...\")\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# # 每天定时12点运行\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# except :\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# print(f\"{current_time}任务失败\")\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m main()\n", "Cell \u001b[1;32mIn[1], line 848\u001b[0m, in \u001b[0;36mmain\u001b[1;34m(start_date, token, token_push)\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmain\u001b[39m(start_date\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,token_push\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 847\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_date \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 848\u001b[0m start_date \u001b[38;5;241m=\u001b[39m datetime\u001b[38;5;241m.\u001b[39mnow()\n\u001b[0;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 850\u001b[0m token \u001b[38;5;241m=\u001b[39m get_head_auth()\n", "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'now'" ] } ], "source": [ "\n", "# if __name__ == \"__main__\":\n", "# print(\"运行中ing...\")\n", "# # 每天定时12点运行\n", "# while True:\n", "# # 获取当前时间\n", "# current_time = time.strftime(\"%H:%M:%S\", time.localtime())\n", "# # print(current_time)\n", "\n", "# # 判断当前时间是否为执行任务的时间点\n", "# try:\n", "# if current_time == \"09:13:00\":\n", "# print(\"执行定时任务\")\n", "# main()\n", "# elif current_time == \"09:18:00\":\n", "# print(\"执行定时任务\")\n", "# main()\n", "# time.sleep(1)\n", "# except :\n", "# print(f\"{current_time}任务失败\")\n", "main()\n", "main()\n", " # 检测数据准确性, 需要检测放开\n", " # check_data(\"100028098|LISTING_PRICE\")\n", " # check_data(\"9137070016544622XB|DAY_Yield\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# start_date = datetime(2025, 5, 16)\n", "# end_date = datetime(2025, 5, 19)\n", "# token = get_head_auth()\n", "\n", "# token_push = get_head_push_auth()\n", "\n", "# while start_date < end_date:\n", "# main(start_date,token,token_push)\n", "# start_date += timedelta(days=1)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "### 代码备份:\n", "\n", "\n", "class codeBackup:\n", "\n", " \n", "\n", "\n", " def write_xls(data,date):\n", " # 创建一个Workbook对象\n", " workbook = xlwt.Workbook()\n", "\n", " # 创建一个Sheet对象,可指定名称\n", " sheet = workbook.load('Sheet1')\n", "\n", " # 写入数据行\n", " for row_index, row_data in enumerate(data):\n", " for col_index, cell_data in enumerate(row_data):\n", " sheet.write(row_index, col_index, cell_data)\n", "\n", " # 保存Workbook到文件\n", " workbook.save(get_cur_time(date)[0] + '.xls')\n", "\n", "\n", " def start():\n", " '''预测上传数据'''\n", " read_xls_data()\n", "\n", " token = get_head_auth()\n", " if not token:\n", " return\n", " token_push = get_head_push_auth()\n", " if not token_push:\n", " return\n", "\n", " datas = get_data_value(token, one_cols[1:])\n", " if not datas:\n", " return\n", "\n", " # data_list = [two_cols, one_cols]\n", " append_rows = [get_cur_time()[1]]\n", " dataItemNo_dataValue = {}\n", " for data_value in datas:\n", " if \"dataValue\" not in data_value:\n", " print(data_value)\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", " else:\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", " \n", " for value in one_cols[1:]:\n", " if value in dataItemNo_dataValue:\n", " append_rows.append(dataItemNo_dataValue[value])\n", " else:\n", " append_rows.append(\"\")\n", " save_xls(append_rows)\n", " \n", " # 获取当月的数据写入到指定文件\n", " queryDataListItemNos(token)\n", " optimize_Model()\n", " upload_data_to_system(token_push)\n", " # data_list.append(three_cols)\n", " # write_xls(data_list)\n", "\n", "\n", "\n", "\n", " def start_1():\n", " '''更新数据'''\n", " print(\"更新当天数据\")\n", " read_xls_data()\n", "\n", " token = get_head_auth()\n", " if not token:\n", " return\n", " \n", "\n", " datas = get_data_value(token, one_cols[1:])\n", " if not datas:\n", " return\n", "\n", " # data_list = [two_cols, one_cols]\n", " append_rows = [get_cur_time()[1]]\n", " dataItemNo_dataValue = {}\n", " for data_value in datas:\n", " if \"dataValue\" not in data_value:\n", " print(data_value)\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = \"\"\n", " else:\n", " dataItemNo_dataValue[data_value[\"dataItemNo\"]] = data_value[\"dataValue\"]\n", " \n", " for value in one_cols[1:]:\n", " if value in dataItemNo_dataValue:\n", " append_rows.append(dataItemNo_dataValue[value])\n", " else:\n", " append_rows.append(\"\")\n", " print(\"当天数据为:\",append_rows)\n", " save_xls_1(append_rows)\n", "\n", " \n", " # data_list.append(three_cols)\n", " # write_xls(data_list)\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 4 }