线上数据维护
This commit is contained in:
parent
0ca7553951
commit
34770b83a3
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@ -1,8 +1,34 @@
|
||||
from statsmodels.tools.eval_measures import mse, rmse
|
||||
from pandas import Series, DataFrame
|
||||
import cufflinks as cf
|
||||
from sklearn.metrics import r2_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
import pickle
|
||||
import warnings
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.metrics import mean_absolute_error
|
||||
from xgboost import plot_importance, plot_tree
|
||||
import xgboost as xgb
|
||||
import plotly.graph_objects as go
|
||||
import plotly.express as px
|
||||
import statsmodels.api as sm
|
||||
from xgboost import XGBRegressor
|
||||
from sklearn.linear_model import Lasso
|
||||
import sklearn.datasets as datasets
|
||||
from sklearn import preprocessing
|
||||
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
||||
from plotly import __version__
|
||||
import random
|
||||
import seaborn as sn
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import json
|
||||
import xlrd
|
||||
import xlwt
|
||||
from datetime import datetime,timedelta
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
# 变量定义
|
||||
login_url = "http://10.200.32.39/jingbo-api/api/server/login"
|
||||
@ -38,69 +64,29 @@ read_file_path_name = "纯苯数据项.xls"
|
||||
one_cols = []
|
||||
two_cols = []
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sn
|
||||
import random
|
||||
import time
|
||||
|
||||
# 导入机器学习算法模型
|
||||
|
||||
|
||||
|
||||
from plotly import __version__
|
||||
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
|
||||
|
||||
from sklearn import preprocessing
|
||||
|
||||
from pandas import Series,DataFrame
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import sklearn.datasets as datasets
|
||||
|
||||
#导入机器学习算法模型
|
||||
from sklearn.linear_model import Lasso
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
import statsmodels.api as sm
|
||||
try:
|
||||
from keras.preprocessing.sequence import TimeseriesGenerator
|
||||
except:
|
||||
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
|
||||
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import plot_importance, plot_tree
|
||||
from sklearn.metrics import mean_absolute_error
|
||||
from statsmodels.tools.eval_measures import mse,rmse
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from xgboost import XGBRegressor
|
||||
import warnings
|
||||
import pickle
|
||||
# 切割训练数据和样本数据
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
#切割训练数据和样本数据
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
#用于模型评分
|
||||
from sklearn.metrics import r2_score
|
||||
# 用于模型评分
|
||||
|
||||
le = preprocessing.LabelEncoder()
|
||||
|
||||
# print(__version__) # requires version >= 1.9.0
|
||||
|
||||
|
||||
import cufflinks as cf
|
||||
cf.go_offline()
|
||||
|
||||
random.seed(100)
|
||||
|
||||
|
||||
|
||||
# 数据获取
|
||||
|
||||
def get_head_auth():
|
||||
@ -114,7 +100,7 @@ def get_head_auth():
|
||||
return None
|
||||
|
||||
|
||||
def get_data_value(token, dataItemNoList,date):
|
||||
def get_data_value(token, dataItemNoList, date):
|
||||
search_data = {
|
||||
"data": {
|
||||
"date": getNow(date)[0],
|
||||
@ -124,7 +110,8 @@ def get_data_value(token, dataItemNoList,date):
|
||||
"funcOperation": "查询"
|
||||
}
|
||||
headers = {"Authorization": token}
|
||||
search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
||||
search_res = requests.post(
|
||||
url=search_url, headers=headers, json=search_data, timeout=(3, 5))
|
||||
search_value = json.loads(search_res.text)["data"]
|
||||
if search_value:
|
||||
return search_value
|
||||
@ -136,10 +123,7 @@ def get_data_value(token, dataItemNoList,date):
|
||||
# xls文件处理
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def write_xls(data,date):
|
||||
def write_xls(data, date):
|
||||
# 创建一个Workbook对象
|
||||
workbook = xlwt.Workbook()
|
||||
|
||||
@ -155,7 +139,6 @@ def write_xls(data,date):
|
||||
workbook.save(get_cur_time(date)[0] + '.xls')
|
||||
|
||||
|
||||
|
||||
def getNow(date='', offset=0):
|
||||
"""生成指定日期的两种格式字符串
|
||||
Args:
|
||||
@ -168,7 +151,7 @@ def getNow(date='', offset=0):
|
||||
tuple: (紧凑日期字符串, 标准日期字符串)
|
||||
"""
|
||||
# 日期解析逻辑
|
||||
from datetime import datetime,timedelta
|
||||
from datetime import datetime, timedelta
|
||||
if isinstance(date, datetime):
|
||||
now = date
|
||||
else:
|
||||
@ -186,7 +169,7 @@ def getNow(date='', offset=0):
|
||||
|
||||
# 应用日期偏移
|
||||
now = now - timedelta(days=offset)
|
||||
|
||||
|
||||
# 统一格式化输出
|
||||
date_str = now.strftime("%Y-%m-%d")
|
||||
compact_date = date_str.replace("-", "")
|
||||
@ -215,7 +198,8 @@ def get_cur_time(date=''):
|
||||
|
||||
|
||||
def get_head_push_auth():
|
||||
login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))
|
||||
login_res = requests.post(
|
||||
url=login_push_url, json=login_push_data, timeout=(3, 5))
|
||||
text = json.loads(login_res.text)
|
||||
if text["status"]:
|
||||
token = text["data"]["accessToken"]
|
||||
@ -225,8 +209,7 @@ def get_head_push_auth():
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def upload_data_to_system(token_push,date):
|
||||
def upload_data_to_system(token_push, date):
|
||||
datavalue = forecast_price()
|
||||
data = {
|
||||
"funcModule": "数据表信息列表",
|
||||
@ -242,85 +225,85 @@ def upload_data_to_system(token_push,date):
|
||||
}
|
||||
print(data)
|
||||
headers = {"Authorization": token_push}
|
||||
res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
|
||||
res = requests.post(url=upload_url, headers=headers,
|
||||
json=data, timeout=(3, 5))
|
||||
print(res.text)
|
||||
|
||||
|
||||
|
||||
|
||||
def forecast_price():
|
||||
# df_test = pd.read_csv('定价模型数据收集0212.csv')
|
||||
df_test = pd.read_excel('纯苯数据项.xls',sheet_name='Sheet1')
|
||||
df_test.drop([0],inplace=True)
|
||||
df_test = pd.read_excel('纯苯数据项.xls', sheet_name='Sheet1')
|
||||
df_test.drop([0], inplace=True)
|
||||
# df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
||||
df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True)
|
||||
|
||||
df_test['Date'] = pd.to_datetime(
|
||||
df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True)
|
||||
|
||||
df_test_1 = df_test
|
||||
df_test_1=df_test_1.fillna(df_test.ffill())
|
||||
df_test_1=df_test_1.fillna(df_test_1.bfill())
|
||||
df_test_1 = df_test_1.fillna(df_test.ffill())
|
||||
df_test_1 = df_test_1.fillna(df_test_1.bfill())
|
||||
|
||||
# 选择用于模型训练的列名称
|
||||
col_for_training = df_test_1.columns
|
||||
|
||||
|
||||
import joblib
|
||||
Best_model_DalyLGPrice = joblib.load("日度价格预测_最佳模型.pkl")
|
||||
# 最新的一天为最后一行的数据
|
||||
|
||||
|
||||
df_test_1_Day = df_test_1.tail(1)
|
||||
# 移除不需要的列
|
||||
df_test_1_Day.index = df_test_1_Day["Date"]
|
||||
df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1)
|
||||
df_test_1_Day=df_test_1_Day.drop('Price',axis=1)
|
||||
df_test_1_Day=df_test_1_Day.dropna()
|
||||
df_test_1_Day = df_test_1_Day.drop(["Date"], axis=1)
|
||||
df_test_1_Day = df_test_1_Day.drop('Price', axis=1)
|
||||
df_test_1_Day = df_test_1_Day.dropna()
|
||||
|
||||
for col in df_test_1_Day.columns:
|
||||
df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce')
|
||||
#预测今日价格,显示至小数点后两位
|
||||
Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)
|
||||
df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col], errors='coerce')
|
||||
# 预测今日价格,显示至小数点后两位
|
||||
Ypredict_Today = Best_model_DalyLGPrice.predict(df_test_1_Day)
|
||||
|
||||
df_test_1_Day['日度预测价格']=Ypredict_Today
|
||||
df_test_1_Day['日度预测价格'] = Ypredict_Today
|
||||
print(df_test_1_Day['日度预测价格'])
|
||||
a = df_test_1_Day['日度预测价格']
|
||||
a = a[0]
|
||||
a = float(a)
|
||||
a = round(a,2)
|
||||
a = round(a, 2)
|
||||
return a
|
||||
|
||||
|
||||
def optimize_Model():
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.preprocessing import OrdinalEncoder
|
||||
from sklearn.feature_selection import SelectFromModel
|
||||
from sklearn.metrics import mean_squared_error, r2_score
|
||||
import pandas as pd
|
||||
|
||||
pd.set_option('display.max_rows',40)
|
||||
pd.set_option('display.max_columns',40)
|
||||
pd.set_option('display.max_rows', 40)
|
||||
pd.set_option('display.max_columns', 40)
|
||||
df_test = pd.read_excel('纯苯数据项.xls')
|
||||
df_test.drop([0],inplace=True)
|
||||
df_test.drop([0], inplace=True)
|
||||
# df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True)
|
||||
df_test['Date']=pd.to_datetime(df_test['Date'], format='%Y-%m-%d',infer_datetime_format=True)
|
||||
df_test['Date'] = pd.to_datetime(
|
||||
df_test['Date'], format='%Y-%m-%d', infer_datetime_format=True)
|
||||
|
||||
|
||||
#将缺失值补为前一个或者后一个数值
|
||||
# 将缺失值补为前一个或者后一个数值
|
||||
df_test_1 = df_test
|
||||
df_test_1=df_test_1.fillna(df_test.ffill())
|
||||
df_test_1=df_test_1.fillna(df_test_1.bfill())
|
||||
df_test_1 = df_test_1.fillna(df_test.ffill())
|
||||
df_test_1 = df_test_1.fillna(df_test_1.bfill())
|
||||
df_test_1["Date"] = pd.to_datetime(df_test_1["Date"])
|
||||
df_test_1.index = df_test_1["Date"]
|
||||
df_test_1 = df_test_1.drop(["Date"], axis= 1)
|
||||
df_test_1 = df_test_1.drop(["Date"], axis=1)
|
||||
df_test_1 = df_test_1.astype('float')
|
||||
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import Series,DataFrame
|
||||
from pandas import Series, DataFrame
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import sklearn.datasets as datasets
|
||||
|
||||
#导入机器学习算法模型
|
||||
# 导入机器学习算法模型
|
||||
from sklearn.linear_model import Lasso
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
@ -336,7 +319,7 @@ def optimize_Model():
|
||||
import xgboost as xgb
|
||||
from xgboost import plot_importance, plot_tree
|
||||
from sklearn.metrics import mean_absolute_error
|
||||
from statsmodels.tools.eval_measures import mse,rmse
|
||||
from statsmodels.tools.eval_measures import mse, rmse
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from xgboost import XGBRegressor
|
||||
import warnings
|
||||
@ -344,63 +327,67 @@ def optimize_Model():
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
#切割训练数据和样本数据
|
||||
# 切割训练数据和样本数据
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
#用于模型评分
|
||||
# 用于模型评分
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
dataset1=df_test_1.drop('Price',axis=1)#.astype(float)
|
||||
dataset1 = df_test_1.drop('Price', axis=1) # .astype(float)
|
||||
|
||||
y=df_test_1['Price']
|
||||
y = df_test_1['Price']
|
||||
|
||||
x=dataset1
|
||||
x = dataset1
|
||||
|
||||
train = x
|
||||
target = y
|
||||
|
||||
#切割数据样本集合测试集
|
||||
X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)
|
||||
# 切割数据样本集合测试集
|
||||
X_train, x_test, y_train, y_true = train_test_split(
|
||||
train, target, test_size=0.2, random_state=0)
|
||||
|
||||
#模型缩写
|
||||
Lasso = Lasso(random_state=0)
|
||||
# 模型缩写
|
||||
Lasso = Lasso(random_state=0)
|
||||
XGBR = XGBRegressor(random_state=0)
|
||||
#训练模型
|
||||
Lasso.fit(X_train,y_train)
|
||||
XGBR.fit(X_train,y_train)
|
||||
#模型拟合
|
||||
# 训练模型
|
||||
Lasso.fit(X_train, y_train)
|
||||
XGBR.fit(X_train, y_train)
|
||||
# 模型拟合
|
||||
y_pre_Lasso = Lasso.predict(x_test)
|
||||
y_pre_XGBR = XGBR.predict(x_test)
|
||||
|
||||
#计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²
|
||||
Lasso_score = r2_score(y_true,y_pre_Lasso)
|
||||
XGBR_score=r2_score(y_true,y_pre_XGBR)
|
||||
# 计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²
|
||||
Lasso_score = r2_score(y_true, y_pre_Lasso)
|
||||
XGBR_score = r2_score(y_true, y_pre_XGBR)
|
||||
|
||||
#计算Lasso、XGBR的MSE和RMSE
|
||||
Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)
|
||||
XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)
|
||||
# 计算Lasso、XGBR的MSE和RMSE
|
||||
Lasso_MSE = mean_squared_error(y_true, y_pre_Lasso)
|
||||
XGBR_MSE = mean_squared_error(y_true, y_pre_XGBR)
|
||||
|
||||
Lasso_RMSE=np.sqrt(Lasso_MSE)
|
||||
XGBR_RMSE=np.sqrt(XGBR_MSE)
|
||||
Lasso_RMSE = np.sqrt(Lasso_MSE)
|
||||
XGBR_RMSE = np.sqrt(XGBR_MSE)
|
||||
# 将不同模型的不同误差值整合成一个表格
|
||||
model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],
|
||||
['XgBoost', XGBR_RMSE, XGBR_score]],
|
||||
columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])
|
||||
#将模型名称(Model)列设置为索引
|
||||
model_results1=model_results.set_index('模型(Model)')
|
||||
columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
||||
# 将模型名称(Model)列设置为索引
|
||||
model_results1 = model_results.set_index('模型(Model)')
|
||||
|
||||
model_results1
|
||||
#定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
||||
def plot_feature_importance(importance,names,model_type):
|
||||
# 定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整
|
||||
|
||||
def plot_feature_importance(importance, names, model_type):
|
||||
feature_importance = np.array(importance)
|
||||
feature_names = np.array(names)
|
||||
|
||||
data={'feature_names':feature_names,'feature_importance':feature_importance}
|
||||
data = {'feature_names': feature_names,
|
||||
'feature_importance': feature_importance}
|
||||
fi_df = pd.DataFrame(data)
|
||||
|
||||
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
|
||||
fi_df.sort_values(by=['feature_importance'],
|
||||
ascending=False, inplace=True)
|
||||
|
||||
plt.figure(figsize=(10,8))
|
||||
plt.figure(figsize=(10, 8))
|
||||
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
|
||||
|
||||
plt.title(model_type + " "+'FEATURE IMPORTANCE')
|
||||
@ -408,60 +395,56 @@ def optimize_Model():
|
||||
plt.ylabel('FEATURE NAMES')
|
||||
from pylab import mpl
|
||||
mpl.rcParams['font.sans-serif'] = ['SimHei']
|
||||
## Xgboost 模型参数优化-初步
|
||||
#参考: https://juejin.im/post/6844903661013827598
|
||||
#每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
||||
# Xgboost 模型参数优化-初步
|
||||
# 参考: https://juejin.im/post/6844903661013827598
|
||||
# 每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可)
|
||||
|
||||
from xgboost import XGBRegressor
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
estimator = XGBRegressor(random_state=0,
|
||||
nthread=4,
|
||||
seed=0
|
||||
)
|
||||
nthread=4,
|
||||
seed=0
|
||||
)
|
||||
parameters = {
|
||||
'max_depth': range (2, 11, 2), # 树的最大深度
|
||||
'n_estimators': range (50, 101, 10), # 迭代次数
|
||||
'max_depth': range(2, 11, 2), # 树的最大深度
|
||||
'n_estimators': range(50, 101, 10), # 迭代次数
|
||||
'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]
|
||||
}
|
||||
|
||||
grid_search_XGB = GridSearchCV(
|
||||
estimator=estimator,
|
||||
param_grid=parameters,
|
||||
# n_jobs = 10,
|
||||
cv = 3,
|
||||
# n_jobs = 10,
|
||||
cv=3,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
grid_search_XGB.fit(X_train, y_train)
|
||||
#如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行
|
||||
# 如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行
|
||||
|
||||
best_parameters = grid_search_XGB.best_estimator_.get_params()
|
||||
y_pred = grid_search_XGB.predict(x_test)
|
||||
|
||||
op_XGBR_score = r2_score(y_true,y_pred)
|
||||
op_XGBR_MSE= mean_squared_error(y_true, y_pred)
|
||||
op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)
|
||||
op_XGBR_score = r2_score(y_true, y_pred)
|
||||
op_XGBR_MSE = mean_squared_error(y_true, y_pred)
|
||||
op_XGBR_RMSE = np.sqrt(op_XGBR_MSE)
|
||||
|
||||
model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],
|
||||
columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
||||
model_results2=model_results2.set_index('模型(Model)')
|
||||
columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
|
||||
model_results2 = model_results2.set_index('模型(Model)')
|
||||
|
||||
# results = model_results1.append(model_results2, ignore_index = False)
|
||||
results = pd.concat([model_results1,model_results2],ignore_index=True)
|
||||
|
||||
|
||||
results = pd.concat([model_results1, model_results2], ignore_index=True)
|
||||
|
||||
import pickle
|
||||
|
||||
Pkl_Filename = "日度价格预测_最佳模型.pkl"
|
||||
Pkl_Filename = "日度价格预测_最佳模型.pkl"
|
||||
|
||||
with open(Pkl_Filename, 'wb') as file:
|
||||
pickle.dump(grid_search_XGB, file)
|
||||
with open(Pkl_Filename, 'wb') as file:
|
||||
pickle.dump(grid_search_XGB, file)
|
||||
|
||||
|
||||
|
||||
|
||||
def read_xls_data():
|
||||
global one_cols, two_cols
|
||||
# 打开 XLS 文件
|
||||
@ -492,9 +475,7 @@ def read_xls_data():
|
||||
# workbook.close()
|
||||
|
||||
|
||||
|
||||
|
||||
def start(date=None,token=None,token_push=None):
|
||||
def start(date=None, token=None, token_push=None):
|
||||
read_xls_data()
|
||||
if date == None:
|
||||
date = getNow()[0]
|
||||
@ -502,7 +483,7 @@ def start(date=None,token=None,token_push=None):
|
||||
token = get_head_auth()
|
||||
token_push = get_head_push_auth()
|
||||
|
||||
datas = get_data_value(token, one_cols,date)
|
||||
datas = get_data_value(token, one_cols, date)
|
||||
if not datas:
|
||||
print("今天没有新数据")
|
||||
return
|
||||
@ -516,21 +497,22 @@ def start(date=None,token=None,token_push=None):
|
||||
print(data_value)
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||
else:
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
||||
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||
] = data_value["dataValue"]
|
||||
|
||||
for value in one_cols:
|
||||
if value in dataItemNo_dataValue:
|
||||
append_rows.append(dataItemNo_dataValue[value])
|
||||
else:
|
||||
append_rows.append("")
|
||||
save_xls(append_rows)
|
||||
|
||||
|
||||
# 获取当月的数据写入到指定文件,如果是补充数据,不需要执行
|
||||
queryDataListItemNos()
|
||||
queryDataListItemNos(date=date)
|
||||
# 模型训练
|
||||
optimize_Model()
|
||||
# 上传预测数据
|
||||
upload_data_to_system(token_push,date)
|
||||
upload_data_to_system(token_push, date)
|
||||
# data_list.append(three_cols)
|
||||
# write_xls(data_list)
|
||||
|
||||
@ -542,9 +524,8 @@ def start_1(date=None):
|
||||
token = get_head_auth()
|
||||
if not token:
|
||||
return
|
||||
|
||||
|
||||
datas = get_data_value(token, one_cols,date=date)
|
||||
datas = get_data_value(token, one_cols, date=date)
|
||||
# if not datas:
|
||||
# return
|
||||
|
||||
@ -556,8 +537,9 @@ def start_1(date=None):
|
||||
print(data_value)
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
|
||||
else:
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
|
||||
|
||||
dataItemNo_dataValue[data_value["dataItemNo"]
|
||||
] = data_value["dataValue"]
|
||||
|
||||
for value in one_cols:
|
||||
if value in dataItemNo_dataValue:
|
||||
append_rows.append(dataItemNo_dataValue[value])
|
||||
@ -565,10 +547,10 @@ def start_1(date=None):
|
||||
append_rows.append("")
|
||||
save_xls_1(append_rows)
|
||||
|
||||
|
||||
# data_list.append(three_cols)
|
||||
# write_xls(data_list)
|
||||
|
||||
|
||||
|
||||
def save_xls_1(append_rows):
|
||||
|
||||
# 打开xls文件
|
||||
@ -610,11 +592,9 @@ def save_xls_1(append_rows):
|
||||
new_sheet.write(row_count, col, append_rows[col])
|
||||
|
||||
# 保存新的xls文件
|
||||
new_workbook.save("纯苯数据项.xls")
|
||||
new_workbook.save("纯苯数据项.xls")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def check_data(dataItemNo):
|
||||
token = get_head_auth()
|
||||
if not token:
|
||||
@ -669,8 +649,6 @@ def save_xls(append_rows):
|
||||
new_workbook.save("纯苯数据项.xls")
|
||||
|
||||
|
||||
|
||||
|
||||
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
|
||||
|
||||
search_data = {
|
||||
@ -684,7 +662,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn
|
||||
}
|
||||
|
||||
headers = {"Authorization": token}
|
||||
search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))
|
||||
search_res = requests.post(
|
||||
url=url, headers=headers, json=search_data, timeout=(3, 5))
|
||||
search_value = json.loads(search_res.text)["data"]
|
||||
if search_value:
|
||||
return search_value
|
||||
@ -692,9 +671,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def save_queryDataListItemNos_xls(data_df,dataItemNoList):
|
||||
from datetime import datetime,timedelta
|
||||
def save_queryDataListItemNos_xls(data_df, dataItemNoList):
|
||||
from datetime import datetime, timedelta
|
||||
current_year_month = datetime.now().strftime('%Y-%m')
|
||||
grouped = data_df.groupby("dataDate")
|
||||
|
||||
@ -727,7 +705,6 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList):
|
||||
# 创建sheet
|
||||
new_sheet = new_workbook.add_sheet(sheet_names[i])
|
||||
|
||||
|
||||
current_year_month_row = 0
|
||||
# 将原有的数据写入新的sheet
|
||||
for row in range(row_count):
|
||||
@ -739,29 +716,28 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList):
|
||||
break
|
||||
new_sheet.write(row, col, data[row][col])
|
||||
|
||||
|
||||
# print("current_year_month_row",current_year_month_row)
|
||||
if i == 0:
|
||||
rowFlag = 0
|
||||
# 查看每组数据
|
||||
for date, group in grouped:
|
||||
new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)
|
||||
new_sheet.write(row_count + rowFlag -
|
||||
current_year_month_row, 0, date)
|
||||
for j in range(len(dataItemNoList)):
|
||||
dataItemNo = dataItemNoList[j]
|
||||
|
||||
if group[group["dataItemNo"] == dataItemNo]["dataValue"].values and (not str(group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) == 'nan'):
|
||||
|
||||
new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0])
|
||||
new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1,
|
||||
group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0])
|
||||
|
||||
rowFlag += 1
|
||||
|
||||
|
||||
# 保存新的xls文件
|
||||
new_workbook.save("纯苯数据项.xls")
|
||||
|
||||
|
||||
|
||||
def queryDataListItemNos(date=None,token=None):
|
||||
def queryDataListItemNos(date=None, token=None):
|
||||
from datetime import datetime, timedelta
|
||||
df = pd.read_excel('纯苯数据项.xls')
|
||||
dataItemNoList = df.iloc[0].tolist()[1:]
|
||||
@ -779,16 +755,29 @@ def queryDataListItemNos(date=None,token=None):
|
||||
first_day_of_month = current_date.replace(day=1)
|
||||
# 格式化为 YYYYMMDD 格式
|
||||
dateEnd = current_date.strftime('%Y%m%d')
|
||||
# dateEnd = date.strftime('%Y%m%d')
|
||||
dateStart = first_day_of_month.strftime('%Y%m%d')
|
||||
search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
|
||||
search_value = get_queryDataListItemNos_value(
|
||||
token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
|
||||
data_df = pd.DataFrame(search_value)
|
||||
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
|
||||
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
|
||||
save_queryDataListItemNos_xls(data_df,dataItemNoList)
|
||||
save_queryDataListItemNos_xls(data_df, dataItemNoList)
|
||||
print('当月数据更新完成')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print('运行中ing')
|
||||
start()
|
||||
# 自定义日期执行预测
|
||||
# start_date = datetime(2025, 7, 6)
|
||||
# end_date = datetime(2025, 7, 7)
|
||||
|
||||
# token = get_head_auth()
|
||||
# token_push = get_head_push_auth()
|
||||
# while start_date < end_date:
|
||||
# print(start_date.strftime('%Y%m%d'))
|
||||
# start(start_date, token, token_push)
|
||||
# time.sleep(2)
|
||||
# # start_1(start_date)
|
||||
# start_date += timedelta(days=1)
|
||||
|
Loading…
Reference in New Issue
Block a user