丙烯py添加当月数据维护功能
This commit is contained in:
parent
91387a7241
commit
d529d0cee6
BIN
aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xlsx
Normal file
BIN
aisenzhecode/聚合级丙烯/丙烯基础数据收集表.xlsx
Normal file
Binary file not shown.
@ -1,3 +1,31 @@
|
|||||||
|
from statsmodels.tools.eval_measures import mse, rmse
|
||||||
|
from pandas import Series, DataFrame
|
||||||
|
import cufflinks as cf
|
||||||
|
from sklearn.metrics import r2_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
import pickle
|
||||||
|
import warnings
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
from sklearn.metrics import mean_absolute_error
|
||||||
|
from xgboost import plot_importance, plot_tree
|
||||||
|
import xgboost as xgb
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import plotly.express as px
|
||||||
|
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
|
||||||
|
import statsmodels.api as sm
|
||||||
|
import datetime
|
||||||
|
from xgboost import XGBRegressor
|
||||||
|
from sklearn.linear_model import Lasso
|
||||||
|
import sklearn.datasets as datasets
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
||||||
|
from plotly import __version__
|
||||||
|
import random
|
||||||
|
import seaborn as sn
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import xlrd
|
import xlrd
|
||||||
@ -7,6 +35,7 @@ import time
|
|||||||
# 变量定义
|
# 变量定义
|
||||||
login_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
login_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
||||||
search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos"
|
search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos"
|
||||||
|
queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos"
|
||||||
|
|
||||||
login_push_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
login_push_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
||||||
upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
|
upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
|
||||||
@ -37,67 +66,24 @@ read_file_path_name = "丙烯基础数据收集表.xls"
|
|||||||
one_cols = []
|
one_cols = []
|
||||||
two_cols = []
|
two_cols = []
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
# 导入机器学习算法模型
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sn
|
|
||||||
import random
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
|
# 切割训练数据和样本数据
|
||||||
|
|
||||||
|
# 用于模型评分
|
||||||
from plotly import __version__
|
|
||||||
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
|
||||||
|
|
||||||
from sklearn import preprocessing
|
|
||||||
|
|
||||||
from pandas import Series,DataFrame
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
import sklearn.datasets as datasets
|
|
||||||
|
|
||||||
#导入机器学习算法模型
|
|
||||||
from sklearn.linear_model import Lasso
|
|
||||||
from xgboost import XGBRegressor
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import statsmodels.api as sm
|
|
||||||
from keras.preprocessing.sequence import TimeseriesGenerator
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
import xgboost as xgb
|
|
||||||
from xgboost import plot_importance, plot_tree
|
|
||||||
from sklearn.metrics import mean_absolute_error
|
|
||||||
from statsmodels.tools.eval_measures import mse,rmse
|
|
||||||
from sklearn.model_selection import GridSearchCV
|
|
||||||
from xgboost import XGBRegressor
|
|
||||||
import warnings
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
from sklearn.metrics import mean_squared_error
|
|
||||||
|
|
||||||
#切割训练数据和样本数据
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
|
|
||||||
#用于模型评分
|
|
||||||
from sklearn.metrics import r2_score
|
|
||||||
|
|
||||||
le = preprocessing.LabelEncoder()
|
le = preprocessing.LabelEncoder()
|
||||||
|
|
||||||
# print(__version__) # requires version >= 1.9.0
|
# print(__version__) # requires version >= 1.9.0
|
||||||
|
|
||||||
|
|
||||||
import cufflinks as cf
|
|
||||||
cf.go_offline()
|
cf.go_offline()
|
||||||
|
|
||||||
random.seed(100)
|
random.seed(100)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 数据获取
|
# 数据获取
|
||||||
|
|
||||||
def get_head_auth():
|
def get_head_auth():
|
||||||
@ -121,7 +107,8 @@ def get_data_value(token, dataItemNoList):
|
|||||||
"funcOperation": "查询"
|
"funcOperation": "查询"
|
||||||
}
|
}
|
||||||
headers = {"Authorization": token}
|
headers = {"Authorization": token}
|
||||||
search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
search_res = requests.post(
|
||||||
|
url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
||||||
search_value = json.loads(search_res.text)["data"]
|
search_value = json.loads(search_res.text)["data"]
|
||||||
if search_value:
|
if search_value:
|
||||||
return search_value
|
return search_value
|
||||||
@ -166,7 +153,8 @@ def get_cur_time():
|
|||||||
|
|
||||||
|
|
||||||
def get_head_push_auth():
|
def get_head_push_auth():
|
||||||
login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))
|
login_res = requests.post(
|
||||||
|
url=login_push_url, json=login_push_data, timeout=(3, 5))
|
||||||
text = json.loads(login_res.text)
|
text = json.loads(login_res.text)
|
||||||
if text["status"]:
|
if text["status"]:
|
||||||
token = text["data"]["accessToken"]
|
token = text["data"]["accessToken"]
|
||||||
@ -176,7 +164,6 @@ def get_head_push_auth():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def upload_data_to_system(token_push):
|
def upload_data_to_system(token_push):
|
||||||
data = {
|
data = {
|
||||||
"funcModule": "数据表信息列表",
|
"funcModule": "数据表信息列表",
|
||||||
@ -185,17 +172,18 @@ def upload_data_to_system(token_push):
|
|||||||
{"dataItemNo": "C01100007|Forecast_Price|ACN",
|
{"dataItemNo": "C01100007|Forecast_Price|ACN",
|
||||||
"dataDate": get_cur_time()[0],
|
"dataDate": get_cur_time()[0],
|
||||||
"dataStatus": "add",
|
"dataStatus": "add",
|
||||||
# "dataValue": 7100
|
# "dataValue": 7100
|
||||||
"dataValue": forecast_price()
|
"dataValue": forecast_price()
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
headers = {"Authorization": token_push}
|
headers = {"Authorization": token_push}
|
||||||
res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
|
res = requests.post(url=upload_url, headers=headers,
|
||||||
|
json=data, timeout=(3, 5))
|
||||||
print(res.text)
|
print(res.text)
|
||||||
|
|
||||||
|
|
||||||
# def upload_data_to_system(token):
|
# def upload_data_to_system(token):
|
||||||
# data = {
|
# data = {
|
||||||
# "funcModule": "数据表信息列表",
|
# "funcModule": "数据表信息列表",
|
||||||
@ -213,92 +201,95 @@ def upload_data_to_system(token_push):
|
|||||||
# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
|
# res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
|
||||||
# print(res.text)
|
# print(res.text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def forecast_price():
|
def forecast_price():
|
||||||
# df_test = pd.read_csv('定价模型数据收集0212.csv')
|
# df_test = pd.read_csv('定价模型数据收集0212.csv')
|
||||||
df_test = pd.read_excel('丙烯基础数据收集表.xls')
|
df_test = pd.read_excel('丙烯基础数据收集表.xlsx')
|
||||||
df_test.drop([0],inplace=True)
|
df_test.drop([0], inplace=True)
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
|
df_test['Date'], format='%Y-%m-%d', infer_datetime_format=True)
|
||||||
|
|
||||||
#将缺失值补为前一个或者后一个数值
|
# 将缺失值补为前一个或者后一个数值
|
||||||
df_test_1 = df_test
|
df_test_1 = df_test
|
||||||
df_test_1=df_test_1.fillna(df_test.ffill())
|
df_test_1 = df_test_1.fillna(df_test.ffill())
|
||||||
df_test_1=df_test_1.fillna(df_test_1.bfill())
|
df_test_1 = df_test_1.fillna(df_test_1.bfill())
|
||||||
|
|
||||||
# 选择用于模型训练的列名称
|
# 选择用于模型训练的列名称
|
||||||
col_for_training = df_test_1.columns
|
col_for_training = df_test_1.columns
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import joblib
|
import joblib
|
||||||
Best_model_DalyLGPrice = joblib.load("日度价格预测_丙烯最佳模型.pkl")
|
Best_model_DalyLGPrice = joblib.load("日度价格预测_丙烯最佳模型.pkl")
|
||||||
# 最新的一天为最后一行的数据
|
# 最新的一天为最后一行的数据
|
||||||
|
|
||||||
df_test_1_Day = df_test_1.tail(1)
|
df_test_1_Day = df_test_1.tail(1)
|
||||||
# 移除不需要的列
|
# 移除不需要的列
|
||||||
df_test_1_Day.index = df_test_1_Day["Date"]
|
df_test_1_Day.index = df_test_1_Day["Date"]
|
||||||
df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1)
|
df_test_1_Day = df_test_1_Day.drop(["Date"], axis=1)
|
||||||
df_test_1_Day=df_test_1_Day.drop('Price',axis=1)
|
df_test_1_Day = df_test_1_Day.drop('Price', axis=1)
|
||||||
df_test_1_Day=df_test_1_Day.dropna()
|
df_test_1_Day = df_test_1_Day.dropna()
|
||||||
|
# 转换数据类型
|
||||||
|
df_test_1_Day = df_test_1_Day.astype(float)
|
||||||
|
|
||||||
# df_test_1_Day
|
# df_test_1_Day
|
||||||
#预测今日价格,显示至小数点后两位
|
# 预测今日价格,显示至小数点后两位
|
||||||
Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)
|
Ypredict_Today = Best_model_DalyLGPrice.predict(df_test_1_Day)
|
||||||
|
|
||||||
df_test_1_Day['日度预测价格']=Ypredict_Today
|
df_test_1_Day['日度预测价格'] = Ypredict_Today
|
||||||
print(df_test_1_Day['日度预测价格'])
|
print(df_test_1_Day['日度预测价格'])
|
||||||
a = df_test_1_Day['日度预测价格']
|
a = df_test_1_Day['日度预测价格']
|
||||||
a = a[0]
|
a = a[0]
|
||||||
a = float(a)
|
a = float(a)
|
||||||
a = round(a,2)
|
a = round(a, 2)
|
||||||
return a
|
return a
|
||||||
|
|
||||||
|
|
||||||
def optimize_Model():
|
def optimize_Model():
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
from sklearn.preprocessing import OrdinalEncoder
|
from sklearn.preprocessing import OrdinalEncoder
|
||||||
from sklearn.feature_selection import SelectFromModel
|
from sklearn.feature_selection import SelectFromModel
|
||||||
from sklearn.metrics import mean_squared_error, r2_score
|
from sklearn.metrics import mean_squared_error, r2_score
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
pd.set_option('display.max_rows',40)
|
pd.set_option('display.max_rows', 40)
|
||||||
pd.set_option('display.max_columns',40)
|
pd.set_option('display.max_columns', 40)
|
||||||
df_test = pd.read_excel('丙烯基础数据收集表.xls')
|
df_test = pd.read_excel('丙烯基础数据收集表.xlsx')
|
||||||
df_test.drop([0],inplace=True)
|
df_test.drop([0], inplace=True)
|
||||||
df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
df_test['Date'] = pd.to_datetime(
|
||||||
#查看每个特征缺失值数量
|
df_test['Date'], format='%Y-%m-%d', infer_datetime_format=True)
|
||||||
MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)
|
# 查看每个特征缺失值数量
|
||||||
#去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1
|
MisVal_Check = df_test.isnull().sum().sort_values(ascending=False)
|
||||||
df_MisVal_Check = pd.DataFrame(MisVal_Check,)#
|
# 去掉缺失值百分比>0.4的特征,去掉这些特征后的新表格命名为df_test_1
|
||||||
df_MisVal_Check_1=df_MisVal_Check.reset_index()
|
df_MisVal_Check = pd.DataFrame(MisVal_Check,)
|
||||||
df_MisVal_Check_1.columns=['Variable_Name','Missing_Number']
|
df_MisVal_Check_1 = df_MisVal_Check.reset_index()
|
||||||
df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)
|
df_MisVal_Check_1.columns = ['Variable_Name', 'Missing_Number']
|
||||||
df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)
|
df_MisVal_Check_1['Missing_Number'] = df_MisVal_Check_1['Missing_Number'] / \
|
||||||
#将缺失值补为前一个或者后一个数值
|
len(df_test)
|
||||||
|
df_test_1 = df_test.drop(
|
||||||
|
df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number'] > 0.4].Variable_Name, axis=1)
|
||||||
|
# 将缺失值补为前一个或者后一个数值
|
||||||
df_test_1 = df_test
|
df_test_1 = df_test
|
||||||
df_test_1=df_test_1.fillna(df_test.ffill())
|
df_test_1 = df_test_1.fillna(df_test.ffill())
|
||||||
df_test_1=df_test_1.fillna(df_test_1.bfill())
|
df_test_1 = df_test_1.fillna(df_test_1.bfill())
|
||||||
df_test_1["Date"] = pd.to_datetime(df_test_1["Date"])
|
df_test_1["Date"] = pd.to_datetime(df_test_1["Date"])
|
||||||
df_test_1.index = df_test_1["Date"]
|
df_test_1.index = df_test_1["Date"]
|
||||||
df_test_1 = df_test_1.drop(["Date"], axis= 1)
|
df_test_1 = df_test_1.drop(["Date"], axis=1)
|
||||||
df_test_1 = df_test_1.astype('float')
|
df_test_1 = df_test_1.astype('float')
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import Series,DataFrame
|
from pandas import Series, DataFrame
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
import sklearn.datasets as datasets
|
import sklearn.datasets as datasets
|
||||||
|
|
||||||
#导入机器学习算法模型
|
# 导入机器学习算法模型
|
||||||
from sklearn.linear_model import Lasso
|
from sklearn.linear_model import Lasso
|
||||||
from xgboost import XGBRegressor
|
from xgboost import XGBRegressor
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
from keras.preprocessing.sequence import TimeseriesGenerator
|
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
|
||||||
|
|
||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
@ -306,7 +297,7 @@ def optimize_Model():
|
|||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from xgboost import plot_importance, plot_tree
|
from xgboost import plot_importance, plot_tree
|
||||||
from sklearn.metrics import mean_absolute_error
|
from sklearn.metrics import mean_absolute_error
|
||||||
from statsmodels.tools.eval_measures import mse,rmse
|
from statsmodels.tools.eval_measures import mse, rmse
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from xgboost import XGBRegressor
|
from xgboost import XGBRegressor
|
||||||
import warnings
|
import warnings
|
||||||
@ -314,63 +305,67 @@ def optimize_Model():
|
|||||||
|
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
#切割训练数据和样本数据
|
# 切割训练数据和样本数据
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
#用于模型评分
|
# 用于模型评分
|
||||||
from sklearn.metrics import r2_score
|
from sklearn.metrics import r2_score
|
||||||
|
|
||||||
dataset1=df_test_1.drop('Price',axis=1)#.astype(float)
|
dataset1 = df_test_1.drop('Price', axis=1) # .astype(float)
|
||||||
|
|
||||||
y=df_test_1['Price']
|
y = df_test_1['Price']
|
||||||
|
|
||||||
x=dataset1
|
x = dataset1
|
||||||
|
|
||||||
train = x
|
train = x
|
||||||
target = y
|
target = y
|
||||||
|
|
||||||
#切割数据样本集合测试集
|
# 切割数据样本集合测试集
|
||||||
X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)
|
X_train, x_test, y_train, y_true = train_test_split(
|
||||||
|
train, target, test_size=0.2, random_state=0)
|
||||||
|
|
||||||
#模型缩写
|
# 模型缩写
|
||||||
Lasso = Lasso(random_state=0)
|
Lasso = Lasso(random_state=0)
|
||||||
XGBR = XGBRegressor(random_state=0)
|
XGBR = XGBRegressor(random_state=0)
|
||||||
#训练模型
|
# 训练模型
|
||||||
Lasso.fit(X_train,y_train)
|
Lasso.fit(X_train, y_train)
|
||||||
XGBR.fit(X_train,y_train)
|
XGBR.fit(X_train, y_train)
|
||||||
#模型拟合
|
# 模型拟合
|
||||||
y_pre_Lasso = Lasso.predict(x_test)
|
y_pre_Lasso = Lasso.predict(x_test)
|
||||||
y_pre_XGBR = XGBR.predict(x_test)
|
y_pre_XGBR = XGBR.predict(x_test)
|
||||||
|
|
||||||
#计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²
|
# 计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²
|
||||||
Lasso_score = r2_score(y_true,y_pre_Lasso)
|
Lasso_score = r2_score(y_true, y_pre_Lasso)
|
||||||
XGBR_score=r2_score(y_true,y_pre_XGBR)
|
XGBR_score = r2_score(y_true, y_pre_XGBR)
|
||||||
|
|
||||||
#计算Lasso、XGBR的MSE和RMSE
|
# 计算Lasso、XGBR的MSE和RMSE
|
||||||
Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)
|
Lasso_MSE = mean_squared_error(y_true, y_pre_Lasso)
|
||||||
XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)
|
XGBR_MSE = mean_squared_error(y_true, y_pre_XGBR)
|
||||||
|
|
||||||
Lasso_RMSE=np.sqrt(Lasso_MSE)
|
Lasso_RMSE = np.sqrt(Lasso_MSE)
|
||||||
XGBR_RMSE=np.sqrt(XGBR_MSE)
|
XGBR_RMSE = np.sqrt(XGBR_MSE)
|
||||||
# 将不同模型的不同误差值整合成一个表格
|
# 将不同模型的不同误差值整合成一个表格
|
||||||
model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],
|
model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],
|
||||||
['XgBoost', XGBR_RMSE, XGBR_score]],
|
['XgBoost', XGBR_RMSE, XGBR_score]],
|
||||||
columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])
|
columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
||||||
#将模型名称(Model)列设置为索引
|
# 将模型名称(Model)列设置为索引
|
||||||
model_results1=model_results.set_index('模型(Model)')
|
model_results1 = model_results.set_index('模型(Model)')
|
||||||
|
|
||||||
model_results1
|
model_results1
|
||||||
#定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
# 定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
||||||
def plot_feature_importance(importance,names,model_type):
|
|
||||||
|
def plot_feature_importance(importance, names, model_type):
|
||||||
feature_importance = np.array(importance)
|
feature_importance = np.array(importance)
|
||||||
feature_names = np.array(names)
|
feature_names = np.array(names)
|
||||||
|
|
||||||
data={'feature_names':feature_names,'feature_importance':feature_importance}
|
data = {'feature_names': feature_names,
|
||||||
|
'feature_importance': feature_importance}
|
||||||
fi_df = pd.DataFrame(data)
|
fi_df = pd.DataFrame(data)
|
||||||
|
|
||||||
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
|
fi_df.sort_values(by=['feature_importance'],
|
||||||
|
ascending=False, inplace=True)
|
||||||
|
|
||||||
plt.figure(figsize=(10,8))
|
plt.figure(figsize=(10, 8))
|
||||||
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
|
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
|
||||||
|
|
||||||
plt.title(model_type + " "+'FEATURE IMPORTANCE')
|
plt.title(model_type + " "+'FEATURE IMPORTANCE')
|
||||||
@ -378,55 +373,145 @@ def optimize_Model():
|
|||||||
plt.ylabel('FEATURE NAMES')
|
plt.ylabel('FEATURE NAMES')
|
||||||
from pylab import mpl
|
from pylab import mpl
|
||||||
mpl.rcParams['font.sans-serif'] = ['SimHei']
|
mpl.rcParams['font.sans-serif'] = ['SimHei']
|
||||||
## Xgboost 模型参数优化-初步
|
# Xgboost 模型参数优化-初步
|
||||||
#参考: https://juejin.im/post/6844903661013827598
|
# 参考: https://juejin.im/post/6844903661013827598
|
||||||
#每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
# 每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
||||||
|
|
||||||
from xgboost import XGBRegressor
|
from xgboost import XGBRegressor
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
estimator = XGBRegressor(random_state=0,
|
estimator = XGBRegressor(random_state=0,
|
||||||
nthread=4,
|
nthread=4,
|
||||||
seed=0
|
seed=0
|
||||||
)
|
)
|
||||||
parameters = {
|
parameters = {
|
||||||
'max_depth': range (2, 11, 2), # 树的最大深度
|
'max_depth': range(2, 11, 2), # 树的最大深度
|
||||||
'n_estimators': range (50, 101, 10), # 迭代次数
|
'n_estimators': range(50, 101, 10), # 迭代次数
|
||||||
'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]
|
'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]
|
||||||
}
|
}
|
||||||
|
|
||||||
grid_search_XGB = GridSearchCV(
|
grid_search_XGB = GridSearchCV(
|
||||||
estimator=estimator,
|
estimator=estimator,
|
||||||
param_grid=parameters,
|
param_grid=parameters,
|
||||||
# n_jobs = 10,
|
# n_jobs = 10,
|
||||||
cv = 3,
|
cv=3,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
grid_search_XGB.fit(X_train, y_train)
|
grid_search_XGB.fit(X_train, y_train)
|
||||||
#如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行
|
# 如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行
|
||||||
|
|
||||||
best_parameters = grid_search_XGB.best_estimator_.get_params()
|
best_parameters = grid_search_XGB.best_estimator_.get_params()
|
||||||
y_pred = grid_search_XGB.predict(x_test)
|
y_pred = grid_search_XGB.predict(x_test)
|
||||||
|
|
||||||
op_XGBR_score = r2_score(y_true,y_pred)
|
op_XGBR_score = r2_score(y_true, y_pred)
|
||||||
op_XGBR_MSE= mean_squared_error(y_true, y_pred)
|
op_XGBR_MSE = mean_squared_error(y_true, y_pred)
|
||||||
op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)
|
op_XGBR_RMSE = np.sqrt(op_XGBR_MSE)
|
||||||
|
|
||||||
model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],
|
model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],
|
||||||
columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
||||||
model_results2=model_results2.set_index('模型(Model)')
|
model_results2 = model_results2.set_index('模型(Model)')
|
||||||
|
|
||||||
results = model_results1.append(model_results2, ignore_index = False)
|
results = pd.concat([model_results1, model_results2], ignore_index=False)
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
Pkl_Filename = "日度价格预测_丙烯最佳模型.pkl"
|
Pkl_Filename = "日度价格预测_丙烯最佳模型.pkl"
|
||||||
|
|
||||||
with open(Pkl_Filename, 'wb') as file:
|
with open(Pkl_Filename, 'wb') as file:
|
||||||
pickle.dump(grid_search_XGB, file)
|
pickle.dump(grid_search_XGB, file)
|
||||||
|
|
||||||
|
|
||||||
|
def queryDataListItemNos(token=None):
|
||||||
|
df = pd.read_excel('丙烯基础数据收集表.xlsx')
|
||||||
|
dataItemNoList = df.iloc[0].tolist()[1:]
|
||||||
|
|
||||||
|
if token is None:
|
||||||
|
token = get_head_auth()
|
||||||
|
|
||||||
|
if not token:
|
||||||
|
print('token获取失败')
|
||||||
|
return
|
||||||
|
|
||||||
|
# 获取当前日期
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
current_date = datetime.now()
|
||||||
|
|
||||||
|
# 获取当月1日
|
||||||
|
first_day_of_month = current_date.replace(day=1)
|
||||||
|
|
||||||
|
# 格式化为 YYYYMMDD 格式
|
||||||
|
dateEnd = current_date.strftime('%Y%m%d')
|
||||||
|
dateStart = first_day_of_month.strftime('%Y%m%d')
|
||||||
|
# dateStart = '20241026'
|
||||||
|
|
||||||
|
search_value = get_queryDataListItemNos_value(
|
||||||
|
token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
|
||||||
|
|
||||||
|
data_df = pd.DataFrame(search_value)
|
||||||
|
|
||||||
|
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
|
||||||
|
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
|
||||||
|
save_queryDataListItemNos_xls(data_df, dataItemNoList)
|
||||||
|
print('当月数据更新完成')
|
||||||
|
|
||||||
|
|
||||||
|
def save_queryDataListItemNos_xls(data_df, dataItemNoList):
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
current_year_month = datetime.now().strftime('%Y-%m')
|
||||||
|
grouped = data_df.groupby("dataDate")
|
||||||
|
|
||||||
|
# 使用openpyxl打开xlsx文件
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
workbook = load_workbook('丙烯基础数据收集表.xlsx')
|
||||||
|
|
||||||
|
# 创建新工作簿
|
||||||
|
new_workbook = load_workbook('丙烯基础数据收集表.xlsx')
|
||||||
|
|
||||||
|
for sheetname in workbook.sheetnames:
|
||||||
|
sheet = workbook[sheetname]
|
||||||
|
new_sheet = new_workbook[sheetname]
|
||||||
|
|
||||||
|
current_year_month_row = 0
|
||||||
|
# 查找当前月份数据起始行
|
||||||
|
for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):
|
||||||
|
if str(row[0]).startswith(current_year_month):
|
||||||
|
current_year_month_row += 1
|
||||||
|
|
||||||
|
# 追加新数据
|
||||||
|
if sheetname == workbook.sheetnames[0]:
|
||||||
|
start_row = sheet.max_row - current_year_month_row + 1
|
||||||
|
for row_idx, (date, group) in enumerate(grouped, start=start_row):
|
||||||
|
new_sheet.cell(row=row_idx, column=1, value=date)
|
||||||
|
for j, dataItemNo in enumerate(dataItemNoList, start=2):
|
||||||
|
if group[group["dataItemNo"] == dataItemNo]["dataValue"].values:
|
||||||
|
new_sheet.cell(row=row_idx, column=j,
|
||||||
|
value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0])
|
||||||
|
|
||||||
|
# 保存修改后的xlsx文件
|
||||||
|
new_workbook.save("丙烯基础数据收集表.xlsx")
|
||||||
|
|
||||||
|
|
||||||
|
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
|
||||||
|
|
||||||
|
search_data = {
|
||||||
|
"funcModule": "数据项",
|
||||||
|
"funcOperation": "查询",
|
||||||
|
"data": {
|
||||||
|
"dateStart": dateStart,
|
||||||
|
"dateEnd": dateEnd,
|
||||||
|
"dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {"Authorization": token}
|
||||||
|
search_res = requests.post(
|
||||||
|
url=url, headers=headers, json=search_data, timeout=(3, 5))
|
||||||
|
search_value = json.loads(search_res.text)["data"]
|
||||||
|
if search_value:
|
||||||
|
return search_value
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def read_xls_data():
|
def read_xls_data():
|
||||||
global one_cols, two_cols
|
global one_cols, two_cols
|
||||||
@ -456,8 +541,6 @@ def read_xls_data():
|
|||||||
# workbook.close()
|
# workbook.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def start():
|
def start():
|
||||||
read_xls_data()
|
read_xls_data()
|
||||||
|
|
||||||
@ -480,8 +563,9 @@ def start():
|
|||||||
print(data_value)
|
print(data_value)
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||||
else:
|
else:
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||||
|
] = data_value["dataValue"]
|
||||||
|
|
||||||
for value in one_cols[1:]:
|
for value in one_cols[1:]:
|
||||||
if value in dataItemNo_dataValue:
|
if value in dataItemNo_dataValue:
|
||||||
append_rows.append(dataItemNo_dataValue[value])
|
append_rows.append(dataItemNo_dataValue[value])
|
||||||
@ -500,7 +584,6 @@ def start_1():
|
|||||||
token = get_head_auth()
|
token = get_head_auth()
|
||||||
if not token:
|
if not token:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
datas = get_data_value(token, one_cols[1:])
|
datas = get_data_value(token, one_cols[1:])
|
||||||
# if not datas:
|
# if not datas:
|
||||||
@ -514,7 +597,8 @@ def start_1():
|
|||||||
print(data_value)
|
print(data_value)
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||||
else:
|
else:
|
||||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||||
|
] = data_value["dataValue"]
|
||||||
|
|
||||||
for value in one_cols[1:]:
|
for value in one_cols[1:]:
|
||||||
if value in dataItemNo_dataValue:
|
if value in dataItemNo_dataValue:
|
||||||
@ -523,10 +607,10 @@ def start_1():
|
|||||||
append_rows.append("")
|
append_rows.append("")
|
||||||
save_xls_1(append_rows)
|
save_xls_1(append_rows)
|
||||||
|
|
||||||
|
|
||||||
# data_list.append(three_cols)
|
# data_list.append(three_cols)
|
||||||
# write_xls(data_list)
|
# write_xls(data_list)
|
||||||
|
|
||||||
|
|
||||||
def save_xls_1(append_rows):
|
def save_xls_1(append_rows):
|
||||||
|
|
||||||
# 打开xls文件
|
# 打开xls文件
|
||||||
@ -568,11 +652,9 @@ def save_xls_1(append_rows):
|
|||||||
new_sheet.write(row_count, col, append_rows[col])
|
new_sheet.write(row_count, col, append_rows[col])
|
||||||
|
|
||||||
# 保存新的xls文件
|
# 保存新的xls文件
|
||||||
new_workbook.save("丙烯基础数据收集表.xls")
|
new_workbook.save("丙烯基础数据收集表.xls")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_data(dataItemNo):
|
def check_data(dataItemNo):
|
||||||
token = get_head_auth()
|
token = get_head_auth()
|
||||||
if not token:
|
if not token:
|
||||||
@ -628,5 +710,8 @@ def save_xls(append_rows):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
start()
|
# start()
|
||||||
|
queryDataListItemNos()
|
||||||
|
optimize_Model()
|
||||||
|
forecast_price()
|
||||||
|
upload_data_to_system(token_push=get_head_push_auth())
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user