液化气预测调试

This commit is contained in:
workpc 2025-07-08 15:54:05 +08:00
parent d529d0cee6
commit 0ca7553951
3 changed files with 189 additions and 174 deletions

View File

@ -1,3 +1,30 @@
from statsmodels.tools.eval_measures import mse, rmse
from pandas import Series, DataFrame
import cufflinks as cf
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pickle
import warnings
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from xgboost import plot_importance, plot_tree
import xgboost as xgb
import plotly.graph_objects as go
import plotly.express as px
import statsmodels.api as sm
from xgboost import XGBRegressor
from sklearn.linear_model import Lasso
import sklearn.datasets as datasets
from sklearn import preprocessing
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import __version__
import time
import random
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests import requests
import json import json
@ -38,60 +65,24 @@ read_file_path_name = "液化气数据.xlsx"
one_cols = [] one_cols = []
two_cols = [] two_cols = []
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import random
import time
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sklearn import preprocessing
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
# 导入机器学习算法模型 # 导入机器学习算法模型
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor
import statsmodels.api as sm
try: try:
from keras.preprocessing.sequence import TimeseriesGenerator from keras.preprocessing.sequence import TimeseriesGenerator
except: except:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import plotly.express as px
import plotly.graph_objects as go
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_absolute_error
from statsmodels.tools.eval_measures import mse,rmse
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
import warnings
import pickle
from sklearn.metrics import mean_squared_error
# 切割训练数据和样本数据 # 切割训练数据和样本数据
from sklearn.model_selection import train_test_split
# 用于模型评分 # 用于模型评分
from sklearn.metrics import r2_score
le = preprocessing.LabelEncoder() le = preprocessing.LabelEncoder()
# print(__version__) # requires version >= 1.9.0 # print(__version__) # requires version >= 1.9.0
import cufflinks as cf
cf.go_offline() cf.go_offline()
random.seed(100) random.seed(100)
@ -121,9 +112,9 @@ def get_data_value(token, dataItemNoList,date):
"funcOperation": "查询" "funcOperation": "查询"
} }
headers = {"Authorization": token} headers = {"Authorization": token}
search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) search_res = requests.post(
url=search_url, headers=headers, json=search_data, timeout=(3, 5))
print('数据项查询参数search_data') print('数据项查询参数search_data')
print(search_data) print(search_data)
print('数据项查询结果search_res') print('数据项查询结果search_res')
@ -145,7 +136,8 @@ def get_data_value(token, dataItemNoList,date):
def get_head_push_auth(): def get_head_push_auth():
login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) login_res = requests.post(
url=login_push_url, json=login_push_data, timeout=(3, 5))
text = json.loads(login_res.text) text = json.loads(login_res.text)
if text["status"]: if text["status"]:
token = text["data"]["accessToken"] token = text["data"]["accessToken"]
@ -155,7 +147,6 @@ def get_head_push_auth():
return None return None
def upload_data_to_system(token_push, date): def upload_data_to_system(token_push, date):
data = { data = {
"funcModule": "数据表信息列表", "funcModule": "数据表信息列表",
@ -170,22 +161,25 @@ def upload_data_to_system(token_push,date):
] ]
} }
headers = {"Authorization": token_push} headers = {"Authorization": token_push}
res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) res = requests.post(url=upload_url, headers=headers,
json=data, timeout=(3, 5))
print(res.text) print(res.text)
print('预测值:', data['data'][0]['dataValue']) print('预测值:', data['data'][0]['dataValue'])
price_list = [] price_list = []
def forecast_price(): def forecast_price():
# df_test = pd.read_csv('定价模型数据收集0212.csv') # df_test = pd.read_csv('定价模型数据收集0212.csv')
df_test = pd.read_excel('液化气数据.xlsx') df_test = pd.read_excel('液化气数据.xlsx')
df_test.drop([0], inplace=True) df_test.drop([0], inplace=True)
try: try:
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) df_test['Date'] = pd.to_datetime(
df_test['Date'], format='%m/%d/%Y', infer_datetime_format=True)
except: except:
df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) df_test['Date'] = pd.to_datetime(
df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True)
df_test_1 = df_test df_test_1 = df_test
df_test_1 = df_test_1.fillna(df_test.ffill()) df_test_1 = df_test_1.fillna(df_test.ffill())
@ -194,9 +188,6 @@ def forecast_price():
# 选择用于模型训练的列名称 # 选择用于模型训练的列名称
col_for_training = df_test_1.columns col_for_training = df_test_1.columns
import joblib import joblib
Best_model_DalyLGPrice = joblib.load("日度价格预测_液化气最佳模型.pkl") Best_model_DalyLGPrice = joblib.load("日度价格预测_液化气最佳模型.pkl")
# 最新的一天为最后一行的数据 # 最新的一天为最后一行的数据
@ -221,6 +212,8 @@ def forecast_price():
a = round(a, 2) a = round(a, 2)
price_list.append(a) price_list.append(a)
return a return a
def optimize_Model(): def optimize_Model():
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
@ -234,10 +227,11 @@ def optimize_Model():
df_test = pd.read_excel('液化气数据.xlsx') df_test = pd.read_excel('液化气数据.xlsx')
df_test.drop([0], inplace=True) df_test.drop([0], inplace=True)
try: try:
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) df_test['Date'] = pd.to_datetime(
df_test['Date'], format='%m/%d/%Y', infer_datetime_format=True)
except: except:
df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) df_test['Date'] = pd.to_datetime(
df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True)
# 将缺失值补为前一个或者后一个数值 # 将缺失值补为前一个或者后一个数值
df_test_1 = df_test df_test_1 = df_test
@ -248,7 +242,6 @@ def optimize_Model():
df_test_1 = df_test_1.drop(["Date"], axis=1) df_test_1 = df_test_1.drop(["Date"], axis=1)
df_test_1 = df_test_1.astype('float') df_test_1 = df_test_1.astype('float')
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pandas import Series, DataFrame from pandas import Series, DataFrame
@ -297,7 +290,8 @@ def optimize_Model():
target = y target = y
# 切割数据样本集合测试集 # 切割数据样本集合测试集
X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) X_train, x_test, y_train, y_true = train_test_split(
train, target, test_size=0.2, random_state=0)
# 模型缩写 # 模型缩写
Lasso = Lasso(random_state=0) Lasso = Lasso(random_state=0)
@ -328,14 +322,17 @@ def optimize_Model():
model_results1 model_results1
# 定义plot_feature_importance函数该函数用于计算特征重要性。此部分代码无需调整 # 定义plot_feature_importance函数该函数用于计算特征重要性。此部分代码无需调整
def plot_feature_importance(importance, names, model_type): def plot_feature_importance(importance, names, model_type):
feature_importance = np.array(importance) feature_importance = np.array(importance)
feature_names = np.array(names) feature_names = np.array(names)
data={'feature_names':feature_names,'feature_importance':feature_importance} data = {'feature_names': feature_names,
'feature_importance': feature_importance}
fi_df = pd.DataFrame(data) fi_df = pd.DataFrame(data)
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) fi_df.sort_values(by=['feature_importance'],
ascending=False, inplace=True)
plt.figure(figsize=(10, 8)) plt.figure(figsize=(10, 8))
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
@ -345,7 +342,7 @@ def optimize_Model():
plt.ylabel('FEATURE NAMES') plt.ylabel('FEATURE NAMES')
from pylab import mpl from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] mpl.rcParams['font.sans-serif'] = ['SimHei']
## Xgboost 模型参数优化-初步 # Xgboost 模型参数优化-初步
# 参考: https://juejin.im/post/6844903661013827598 # 参考: https://juejin.im/post/6844903661013827598
# 每次调参时备选参数数值以同数量级的1、3、10设置即可比如设置1、3、10或0.1、0.3、1.0或0.01,0.03,0.10即可) # 每次调参时备选参数数值以同数量级的1、3、10设置即可比如设置1、3、10或0.1、0.3、1.0或0.01,0.03,0.10即可)
@ -387,7 +384,8 @@ def optimize_Model():
try: try:
results = model_results1.append(model_results2, ignore_index=False) results = model_results1.append(model_results2, ignore_index=False)
except: except:
results = pd.concat([model_results1,model_results2],ignore_index=True) results = pd.concat(
[model_results1, model_results2], ignore_index=True)
import pickle import pickle
Pkl_Filename = "日度价格预测_液化气最佳模型.pkl" Pkl_Filename = "日度价格预测_液化气最佳模型.pkl"
@ -395,6 +393,7 @@ def optimize_Model():
with open(Pkl_Filename, 'wb') as file: with open(Pkl_Filename, 'wb') as file:
pickle.dump(grid_search_XGB, file) pickle.dump(grid_search_XGB, file)
def read_xls_data(): def read_xls_data():
"""获取特征项ID""" """获取特征项ID"""
global one_cols, two_cols global one_cols, two_cols
@ -425,7 +424,8 @@ def start(date=''):
print(data_value) print(data_value)
dataItemNo_dataValue[data_value["dataItemNo"]] = "" dataItemNo_dataValue[data_value["dataItemNo"]] = ""
else: else:
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] dataItemNo_dataValue[data_value["dataItemNo"]
] = data_value["dataValue"]
for value in one_cols: for value in one_cols:
if value in dataItemNo_dataValue: if value in dataItemNo_dataValue:
@ -472,6 +472,7 @@ def getNow(date='', offset=0):
compact_date = date_str.replace("-", "") compact_date = date_str.replace("-", "")
return compact_date, date_str return compact_date, date_str
def start_1(date=''): def start_1(date=''):
"""补充昨日数据""" """补充昨日数据"""
read_xls_data() read_xls_data()
@ -493,7 +494,8 @@ def start_1(date=''):
print(data_value) print(data_value)
dataItemNo_dataValue[data_value["dataItemNo"]] = "" dataItemNo_dataValue[data_value["dataItemNo"]] = ""
else: else:
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] dataItemNo_dataValue[data_value["dataItemNo"]
] = data_value["dataValue"]
for value in one_cols: for value in one_cols:
if value in dataItemNo_dataValue: if value in dataItemNo_dataValue:
@ -537,10 +539,12 @@ def save_xls_2(append_rows):
except FileNotFoundError: except FileNotFoundError:
# 如果文件不存在则创建新文件 # 如果文件不存在则创建新文件
pd.DataFrame([append_rows]).to_excel('液化气数据.xlsx', index=False, engine='openpyxl') pd.DataFrame([append_rows]).to_excel(
'液化气数据.xlsx', index=False, engine='openpyxl')
except Exception as e: except Exception as e:
print(f"保存数据时发生错误: {str(e)}") print(f"保存数据时发生错误: {str(e)}")
def check_data(dataItemNo): def check_data(dataItemNo):
token = get_head_auth() token = get_head_auth()
if not token: if not token:
@ -550,6 +554,7 @@ def check_data(dataItemNo):
if not datas: if not datas:
return return
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
search_data = { search_data = {
@ -563,13 +568,15 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn
} }
headers = {"Authorization": token} headers = {"Authorization": token}
search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) search_res = requests.post(
url=url, headers=headers, json=search_data, timeout=(3, 5))
search_value = json.loads(search_res.text)["data"] search_value = json.loads(search_res.text)["data"]
if search_value: if search_value:
return search_value return search_value
else: else:
return None return None
def save_queryDataListItemNos_xls(data_df, dataItemNoList): def save_queryDataListItemNos_xls(data_df, dataItemNoList):
from datetime import datetime, timedelta from datetime import datetime, timedelta
current_year_month = datetime.now().strftime('%Y-%m') current_year_month = datetime.now().strftime('%Y-%m')
@ -624,7 +631,9 @@ def queryDataListItemNos(date=None,token=None):
# 格式化为 YYYYMMDD 格式 # 格式化为 YYYYMMDD 格式
dateEnd = current_date.strftime('%Y%m%d') dateEnd = current_date.strftime('%Y%m%d')
dateStart = first_day_of_month.strftime('%Y%m%d') dateStart = first_day_of_month.strftime('%Y%m%d')
search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) # dateStart = '20250604'
search_value = get_queryDataListItemNos_value(
token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
data_df = pd.DataFrame(search_value) data_df = pd.DataFrame(search_value)
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
@ -632,8 +641,6 @@ def queryDataListItemNos(date=None,token=None):
print('当月数据更新完成') print('当月数据更新完成')
def main(start_date=None, token=None, token_push=None): def main(start_date=None, token=None, token_push=None):
from datetime import datetime, timedelta from datetime import datetime, timedelta
if start_date is None: if start_date is None:
@ -654,10 +661,18 @@ def main(start_date=None,token=None,token_push=None):
# start(date) # start(date)
# 训练模型 # 训练模型
optimize_Model() optimize_Model()
# # 预测&上传预测结果 # 预测&上传预测结果
upload_data_to_system(token_push, start_date) upload_data_to_system(token_push, start_date)
# forecast_price()
if __name__ == "__main__": if __name__ == "__main__":
print("运行中ing...") print("运行中ing...")
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
# for i_time in pd.date_range('2025-7-1', '2025-7-8', freq='D'):
# # try:
# print(i_time)
# main(start_date=i_time)
# except Exception as e:
# continue
main() main()