液化气预测调试
This commit is contained in:
parent
d529d0cee6
commit
0ca7553951
Binary file not shown.
Binary file not shown.
@ -1,3 +1,30 @@
|
|||||||
|
from statsmodels.tools.eval_measures import mse, rmse
|
||||||
|
from pandas import Series, DataFrame
|
||||||
|
import cufflinks as cf
|
||||||
|
from sklearn.metrics import r2_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
import pickle
|
||||||
|
import warnings
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
from sklearn.metrics import mean_absolute_error
|
||||||
|
from xgboost import plot_importance, plot_tree
|
||||||
|
import xgboost as xgb
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import plotly.express as px
|
||||||
|
import statsmodels.api as sm
|
||||||
|
from xgboost import XGBRegressor
|
||||||
|
from sklearn.linear_model import Lasso
|
||||||
|
import sklearn.datasets as datasets
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
||||||
|
from plotly import __version__
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import seaborn as sn
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@ -38,60 +65,24 @@ read_file_path_name = "液化气数据.xlsx"
|
|||||||
one_cols = []
|
one_cols = []
|
||||||
two_cols = []
|
two_cols = []
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sn
|
|
||||||
import random
|
|
||||||
import time
|
|
||||||
|
|
||||||
from plotly import __version__
|
|
||||||
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
|
||||||
|
|
||||||
from sklearn import preprocessing
|
|
||||||
|
|
||||||
from pandas import Series,DataFrame
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
import sklearn.datasets as datasets
|
|
||||||
|
|
||||||
# 导入机器学习算法模型
|
# 导入机器学习算法模型
|
||||||
from sklearn.linear_model import Lasso
|
|
||||||
from xgboost import XGBRegressor
|
|
||||||
|
|
||||||
import statsmodels.api as sm
|
|
||||||
try:
|
try:
|
||||||
from keras.preprocessing.sequence import TimeseriesGenerator
|
from keras.preprocessing.sequence import TimeseriesGenerator
|
||||||
except:
|
except:
|
||||||
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
|
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
import xgboost as xgb
|
|
||||||
from xgboost import plot_importance, plot_tree
|
|
||||||
from sklearn.metrics import mean_absolute_error
|
|
||||||
from statsmodels.tools.eval_measures import mse,rmse
|
|
||||||
from sklearn.model_selection import GridSearchCV
|
|
||||||
from xgboost import XGBRegressor
|
|
||||||
import warnings
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
from sklearn.metrics import mean_squared_error
|
|
||||||
|
|
||||||
# 切割训练数据和样本数据
|
# 切割训练数据和样本数据
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
|
|
||||||
# 用于模型评分
|
# 用于模型评分
|
||||||
from sklearn.metrics import r2_score
|
|
||||||
|
|
||||||
le = preprocessing.LabelEncoder()
|
le = preprocessing.LabelEncoder()
|
||||||
|
|
||||||
# print(__version__) # requires version >= 1.9.0
|
# print(__version__) # requires version >= 1.9.0
|
||||||
|
|
||||||
|
|
||||||
import cufflinks as cf
|
|
||||||
cf.go_offline()
|
cf.go_offline()
|
||||||
|
|
||||||
random.seed(100)
|
random.seed(100)
|
||||||
@ -121,9 +112,9 @@ def get_data_value(token, dataItemNoList,date):
|
|||||||
"funcOperation": "查询"
|
"funcOperation": "查询"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
headers = {"Authorization": token}
|
headers = {"Authorization": token}
|
||||||
search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
search_res = requests.post(
|
||||||
|
url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
||||||
print('数据项查询参数search_data:')
|
print('数据项查询参数search_data:')
|
||||||
print(search_data)
|
print(search_data)
|
||||||
print('数据项查询结果search_res:')
|
print('数据项查询结果search_res:')
|
||||||
@ -145,7 +136,8 @@ def get_data_value(token, dataItemNoList,date):
|
|||||||
|
|
||||||
|
|
||||||
def get_head_push_auth():
|
def get_head_push_auth():
|
||||||
login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))
|
login_res = requests.post(
|
||||||
|
url=login_push_url, json=login_push_data, timeout=(3, 5))
|
||||||
text = json.loads(login_res.text)
|
text = json.loads(login_res.text)
|
||||||
if text["status"]:
|
if text["status"]:
|
||||||
token = text["data"]["accessToken"]
|
token = text["data"]["accessToken"]
|
||||||
@ -155,7 +147,6 @@ def get_head_push_auth():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def upload_data_to_system(token_push, date):
|
def upload_data_to_system(token_push, date):
|
||||||
data = {
|
data = {
|
||||||
"funcModule": "数据表信息列表",
|
"funcModule": "数据表信息列表",
|
||||||
@ -170,22 +161,25 @@ def upload_data_to_system(token_push,date):
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
headers = {"Authorization": token_push}
|
headers = {"Authorization": token_push}
|
||||||
res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
|
res = requests.post(url=upload_url, headers=headers,
|
||||||
|
json=data, timeout=(3, 5))
|
||||||
print(res.text)
|
print(res.text)
|
||||||
print('预测值:', data['data'][0]['dataValue'])
|
print('预测值:', data['data'][0]['dataValue'])
|
||||||
|
|
||||||
|
|
||||||
price_list = []
|
price_list = []
|
||||||
|
|
||||||
|
|
||||||
def forecast_price():
|
def forecast_price():
|
||||||
# df_test = pd.read_csv('定价模型数据收集0212.csv')
|
# df_test = pd.read_csv('定价模型数据收集0212.csv')
|
||||||
df_test = pd.read_excel('液化气数据.xlsx')
|
df_test = pd.read_excel('液化气数据.xlsx')
|
||||||
df_test.drop([0], inplace=True)
|
df_test.drop([0], inplace=True)
|
||||||
try:
|
try:
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
|
df_test['Date'], format='%m/%d/%Y', infer_datetime_format=True)
|
||||||
except:
|
except:
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
|
df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True)
|
||||||
|
|
||||||
df_test_1 = df_test
|
df_test_1 = df_test
|
||||||
df_test_1 = df_test_1.fillna(df_test.ffill())
|
df_test_1 = df_test_1.fillna(df_test.ffill())
|
||||||
@ -194,9 +188,6 @@ def forecast_price():
|
|||||||
# 选择用于模型训练的列名称
|
# 选择用于模型训练的列名称
|
||||||
col_for_training = df_test_1.columns
|
col_for_training = df_test_1.columns
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import joblib
|
import joblib
|
||||||
Best_model_DalyLGPrice = joblib.load("日度价格预测_液化气最佳模型.pkl")
|
Best_model_DalyLGPrice = joblib.load("日度价格预测_液化气最佳模型.pkl")
|
||||||
# 最新的一天为最后一行的数据
|
# 最新的一天为最后一行的数据
|
||||||
@ -221,6 +212,8 @@ def forecast_price():
|
|||||||
a = round(a, 2)
|
a = round(a, 2)
|
||||||
price_list.append(a)
|
price_list.append(a)
|
||||||
return a
|
return a
|
||||||
|
|
||||||
|
|
||||||
def optimize_Model():
|
def optimize_Model():
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
@ -234,10 +227,11 @@ def optimize_Model():
|
|||||||
df_test = pd.read_excel('液化气数据.xlsx')
|
df_test = pd.read_excel('液化气数据.xlsx')
|
||||||
df_test.drop([0], inplace=True)
|
df_test.drop([0], inplace=True)
|
||||||
try:
|
try:
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
|
df_test['Date'], format='%m/%d/%Y', infer_datetime_format=True)
|
||||||
except:
|
except:
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
|
df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True)
|
||||||
|
|
||||||
# 将缺失值补为前一个或者后一个数值
|
# 将缺失值补为前一个或者后一个数值
|
||||||
df_test_1 = df_test
|
df_test_1 = df_test
|
||||||
@ -248,7 +242,6 @@ def optimize_Model():
|
|||||||
df_test_1 = df_test_1.drop(["Date"], axis=1)
|
df_test_1 = df_test_1.drop(["Date"], axis=1)
|
||||||
df_test_1 = df_test_1.astype('float')
|
df_test_1 = df_test_1.astype('float')
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import Series, DataFrame
|
from pandas import Series, DataFrame
|
||||||
@ -297,7 +290,8 @@ def optimize_Model():
|
|||||||
target = y
|
target = y
|
||||||
|
|
||||||
# 切割数据样本集合测试集
|
# 切割数据样本集合测试集
|
||||||
X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)
|
X_train, x_test, y_train, y_true = train_test_split(
|
||||||
|
train, target, test_size=0.2, random_state=0)
|
||||||
|
|
||||||
# 模型缩写
|
# 模型缩写
|
||||||
Lasso = Lasso(random_state=0)
|
Lasso = Lasso(random_state=0)
|
||||||
@ -328,14 +322,17 @@ def optimize_Model():
|
|||||||
|
|
||||||
model_results1
|
model_results1
|
||||||
# 定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
# 定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
||||||
|
|
||||||
def plot_feature_importance(importance, names, model_type):
|
def plot_feature_importance(importance, names, model_type):
|
||||||
feature_importance = np.array(importance)
|
feature_importance = np.array(importance)
|
||||||
feature_names = np.array(names)
|
feature_names = np.array(names)
|
||||||
|
|
||||||
data={'feature_names':feature_names,'feature_importance':feature_importance}
|
data = {'feature_names': feature_names,
|
||||||
|
'feature_importance': feature_importance}
|
||||||
fi_df = pd.DataFrame(data)
|
fi_df = pd.DataFrame(data)
|
||||||
|
|
||||||
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
|
fi_df.sort_values(by=['feature_importance'],
|
||||||
|
ascending=False, inplace=True)
|
||||||
|
|
||||||
plt.figure(figsize=(10, 8))
|
plt.figure(figsize=(10, 8))
|
||||||
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
|
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
|
||||||
@ -345,7 +342,7 @@ def optimize_Model():
|
|||||||
plt.ylabel('FEATURE NAMES')
|
plt.ylabel('FEATURE NAMES')
|
||||||
from pylab import mpl
|
from pylab import mpl
|
||||||
mpl.rcParams['font.sans-serif'] = ['SimHei']
|
mpl.rcParams['font.sans-serif'] = ['SimHei']
|
||||||
## Xgboost 模型参数优化-初步
|
# Xgboost 模型参数优化-初步
|
||||||
# 参考: https://juejin.im/post/6844903661013827598
|
# 参考: https://juejin.im/post/6844903661013827598
|
||||||
# 每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
# 每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
||||||
|
|
||||||
@ -387,7 +384,8 @@ def optimize_Model():
|
|||||||
try:
|
try:
|
||||||
results = model_results1.append(model_results2, ignore_index=False)
|
results = model_results1.append(model_results2, ignore_index=False)
|
||||||
except:
|
except:
|
||||||
results = pd.concat([model_results1,model_results2],ignore_index=True)
|
results = pd.concat(
|
||||||
|
[model_results1, model_results2], ignore_index=True)
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
Pkl_Filename = "日度价格预测_液化气最佳模型.pkl"
|
Pkl_Filename = "日度价格预测_液化气最佳模型.pkl"
|
||||||
@ -395,6 +393,7 @@ def optimize_Model():
|
|||||||
with open(Pkl_Filename, 'wb') as file:
|
with open(Pkl_Filename, 'wb') as file:
|
||||||
pickle.dump(grid_search_XGB, file)
|
pickle.dump(grid_search_XGB, file)
|
||||||
|
|
||||||
|
|
||||||
def read_xls_data():
|
def read_xls_data():
|
||||||
"""获取特征项ID"""
|
"""获取特征项ID"""
|
||||||
global one_cols, two_cols
|
global one_cols, two_cols
|
||||||
@ -425,7 +424,8 @@ def start(date=''):
|
|||||||
print(data_value)
|
print(data_value)
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||||
else:
|
else:
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||||
|
] = data_value["dataValue"]
|
||||||
|
|
||||||
for value in one_cols:
|
for value in one_cols:
|
||||||
if value in dataItemNo_dataValue:
|
if value in dataItemNo_dataValue:
|
||||||
@ -472,6 +472,7 @@ def getNow(date='', offset=0):
|
|||||||
compact_date = date_str.replace("-", "")
|
compact_date = date_str.replace("-", "")
|
||||||
return compact_date, date_str
|
return compact_date, date_str
|
||||||
|
|
||||||
|
|
||||||
def start_1(date=''):
|
def start_1(date=''):
|
||||||
"""补充昨日数据"""
|
"""补充昨日数据"""
|
||||||
read_xls_data()
|
read_xls_data()
|
||||||
@ -493,7 +494,8 @@ def start_1(date=''):
|
|||||||
print(data_value)
|
print(data_value)
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||||
else:
|
else:
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||||
|
] = data_value["dataValue"]
|
||||||
|
|
||||||
for value in one_cols:
|
for value in one_cols:
|
||||||
if value in dataItemNo_dataValue:
|
if value in dataItemNo_dataValue:
|
||||||
@ -537,10 +539,12 @@ def save_xls_2(append_rows):
|
|||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
# 如果文件不存在则创建新文件
|
# 如果文件不存在则创建新文件
|
||||||
pd.DataFrame([append_rows]).to_excel('液化气数据.xlsx', index=False, engine='openpyxl')
|
pd.DataFrame([append_rows]).to_excel(
|
||||||
|
'液化气数据.xlsx', index=False, engine='openpyxl')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"保存数据时发生错误: {str(e)}")
|
print(f"保存数据时发生错误: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def check_data(dataItemNo):
|
def check_data(dataItemNo):
|
||||||
token = get_head_auth()
|
token = get_head_auth()
|
||||||
if not token:
|
if not token:
|
||||||
@ -550,6 +554,7 @@ def check_data(dataItemNo):
|
|||||||
if not datas:
|
if not datas:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
|
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
|
||||||
|
|
||||||
search_data = {
|
search_data = {
|
||||||
@ -563,13 +568,15 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn
|
|||||||
}
|
}
|
||||||
|
|
||||||
headers = {"Authorization": token}
|
headers = {"Authorization": token}
|
||||||
search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))
|
search_res = requests.post(
|
||||||
|
url=url, headers=headers, json=search_data, timeout=(3, 5))
|
||||||
search_value = json.loads(search_res.text)["data"]
|
search_value = json.loads(search_res.text)["data"]
|
||||||
if search_value:
|
if search_value:
|
||||||
return search_value
|
return search_value
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def save_queryDataListItemNos_xls(data_df, dataItemNoList):
|
def save_queryDataListItemNos_xls(data_df, dataItemNoList):
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
current_year_month = datetime.now().strftime('%Y-%m')
|
current_year_month = datetime.now().strftime('%Y-%m')
|
||||||
@ -624,7 +631,9 @@ def queryDataListItemNos(date=None,token=None):
|
|||||||
# 格式化为 YYYYMMDD 格式
|
# 格式化为 YYYYMMDD 格式
|
||||||
dateEnd = current_date.strftime('%Y%m%d')
|
dateEnd = current_date.strftime('%Y%m%d')
|
||||||
dateStart = first_day_of_month.strftime('%Y%m%d')
|
dateStart = first_day_of_month.strftime('%Y%m%d')
|
||||||
search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
|
# dateStart = '20250604'
|
||||||
|
search_value = get_queryDataListItemNos_value(
|
||||||
|
token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
|
||||||
data_df = pd.DataFrame(search_value)
|
data_df = pd.DataFrame(search_value)
|
||||||
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
|
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
|
||||||
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
|
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
|
||||||
@ -632,8 +641,6 @@ def queryDataListItemNos(date=None,token=None):
|
|||||||
print('当月数据更新完成')
|
print('当月数据更新完成')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(start_date=None, token=None, token_push=None):
|
def main(start_date=None, token=None, token_push=None):
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
if start_date is None:
|
if start_date is None:
|
||||||
@ -654,10 +661,18 @@ def main(start_date=None,token=None,token_push=None):
|
|||||||
# start(date)
|
# start(date)
|
||||||
# 训练模型
|
# 训练模型
|
||||||
optimize_Model()
|
optimize_Model()
|
||||||
# # 预测&上传预测结果
|
# 预测&上传预测结果
|
||||||
upload_data_to_system(token_push, start_date)
|
upload_data_to_system(token_push, start_date)
|
||||||
|
# forecast_price()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("运行中ing...")
|
print("运行中ing...")
|
||||||
|
# 遍历2024-11-25 到 2024-12-3 之间的工作日日期
|
||||||
|
# for i_time in pd.date_range('2025-7-1', '2025-7-8', freq='D'):
|
||||||
|
# # try:
|
||||||
|
# print(i_time)
|
||||||
|
# main(start_date=i_time)
|
||||||
|
# except Exception as e:
|
||||||
|
# continue
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user