PriceForecast/aisenzhecode/沥青/沥青定量每日执行.py
2025-06-04 18:04:15 +08:00

718 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
from datetime import datetime, timedelta
import time
import pandas as pd
# 变量定义
login_url = "http://10.200.32.39/jingbo-api/api/server/login"
search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos"
queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos"
login_push_url = "http://10.200.32.39/jingbo-api/api/server/login"
upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList"
login_data = {
"data": {
"account": "api_dev",
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
"terminal": "API"
},
"funcModule": "API",
"funcOperation": "获取token"
}
login_push_data = {
"data": {
"account": "api_dev",
"password": "ZTEwYWRjMzk0OWJhNTlhYmJlNTZlMDU3ZjIwZjg4M2U=",
"tenantHashCode": "8a4577dbd919675758d57999a1e891fe",
"terminal": "API"
},
"funcModule": "API",
"funcOperation": "获取token"
}
read_file_path_name = "沥青数据项.xlsx"
one_cols = []
two_cols = []
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import random
import time
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sklearn import preprocessing
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
#导入机器学习算法模型
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor
import statsmodels.api as sm
# from keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import plotly.express as px
import plotly.graph_objects as go
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_absolute_error
from statsmodels.tools.eval_measures import mse,rmse
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
import warnings
import pickle
from sklearn.metrics import mean_squared_error
#切割训练数据和样本数据
from sklearn.model_selection import train_test_split
#用于模型评分
from sklearn.metrics import r2_score
le = preprocessing.LabelEncoder()
# print(__version__) # requires version >= 1.9.0
import cufflinks as cf
cf.go_offline()
random.seed(100)
# 数据获取
def get_head_auth():
login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5))
text = json.loads(login_res.text)
if text["status"]:
token = text["data"]["accessToken"]
return token
else:
print("获取认证失败")
return None
def get_data_value(token, dataItemNoList,date=''):
search_data = {
"data": {
"date": getNow(date)[0],
"dataItemNoList": dataItemNoList
},
"funcModule": "数据项",
"funcOperation": "查询"
}
headers = {"Authorization": token}
search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5))
search_value = json.loads(search_res.text)["data"]
if search_value:
return search_value
else:
print("今天没有新数据")
return None
# xls文件处理
def getNow(date='', offset=0):
"""生成指定日期的两种格式字符串
Args:
date: 支持多种输入类型:
- datetime对象
- 字符串格式(支持'%Y-%m-%d''%Y%m%d'
- 空字符串表示当前日期
offset: 日期偏移天数
Returns:
tuple: (紧凑日期字符串, 标准日期字符串)
"""
# 日期解析逻辑
from datetime import datetime,timedelta
if isinstance(date, datetime):
now = date
else:
now = datetime.now()
if date:
# 尝试多种日期格式解析
for fmt in ('%Y-%m-%d', '%Y%m%d', '%Y/%m/%d'):
try:
now = datetime.strptime(str(date), fmt)
break
except ValueError:
continue
else:
raise ValueError(f"无法解析的日期格式: {date}")
# 应用日期偏移
now = now - timedelta(days=offset)
# 统一格式化输出
date_str = now.strftime("%Y-%m-%d")
compact_date = date_str.replace("-", "")
return compact_date, date_str
def get_head_push_auth():
login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5))
text = json.loads(login_res.text)
if text["status"]:
token = text["data"]["accessToken"]
return token
else:
print("获取认证失败")
return None
def upload_data_to_system(token_push,date):
data = {
"funcModule": "数据表信息列表",
"funcOperation": "新增",
"data": [
{"dataItemNo": "C01100036|Forecast_Price|ACN",
"dataDate": getNow(date)[0],
"dataStatus": "add",
"dataValue": forecast_price()
}
]
}
headers = {"Authorization": token_push}
res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5))
print(res.text)
def forecast_price():
df_test = pd.read_excel('沥青数据项.xlsx')
df_test.drop([0],inplace=True)
df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',
'科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',
'布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',
'隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',
'京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量',
'齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',
'科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',
'布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',
'隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',
'京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')
# df_test['日期']=pd.to_datetime(df_test['日期'], format='%d/%m/%Y',infer_datetime_format=True)
df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)
#查看每个特征缺失值数量
MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)
#去掉缺失值百分比>0.4的特征去掉这些特征后的新表格命名为df_test_1
df_MisVal_Check = pd.DataFrame(MisVal_Check,)#
df_MisVal_Check_1=df_MisVal_Check.reset_index()
df_MisVal_Check_1.columns=['Variable_Name','Missing_Number']
df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)
df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)
#将缺失值补为前一个或者后一个数值
df_test_1=df_test_1.fillna(df_test.ffill())
df_test_1=df_test_1.fillna(df_test_1.bfill())
# 选择用于模型训练的列名称
col_for_training = df_test_1.columns
import joblib
Best_model_DalyLGPrice = joblib.load("日度价格预测_最佳模型.pkl")
# 最新的一天为最后一行的数据
df_test_1_Day = df_test_1.tail(1)
# 移除不需要的列
df_test_1_Day.index = df_test_1_Day["日期"]
df_test_1_Day = df_test_1_Day.drop(["日期"], axis= 1)
df_test_1_Day=df_test_1_Day.drop('京博指导价',axis=1)
df_test_1_Day=df_test_1_Day.dropna()
# df_test_1_Day
#预测今日价格,显示至小数点后两位
Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day)
df_test_1_Day['日度预测价格']=Ypredict_Today
print(df_test_1_Day['日度预测价格'])
a = df_test_1_Day['日度预测价格']
a = a[0]
a = float(a)
a = round(a,2)
return a
def optimize_Model():
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import mean_squared_error, r2_score
pd.set_option('display.max_rows',40)
pd.set_option('display.max_columns',40)
df_test = pd.read_excel('沥青数据项.xlsx')
df_test.drop([0],inplace=True)
df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',
'科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',
'布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',
'隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',
'京博签单量','京博库存量','京博产量','加权平均成交价']] = df_test[['汽油执行价','柴油执行价','齐鲁石化销量','齐鲁石化产量','齐鲁石化成交价','齐鲁石化库存','科力达销量',
'科力达产量','科力达成交价','科力达库存','弘润销量','弘润产量','弘润成交价','弘润库存','市场成交价','京博指导价',
'布伦特上周收盘价','布伦特昨日收盘价','布伦特收盘价','上期所沥青主力合约','隆重资讯沥青日开工率','隆重资讯沥青月库存',
'隆重资讯沥青月产量','隆重资讯沥青表观消费量','隆重资讯社会库存率','厂区库容','京博提货量','即期成本','异地库库存',
'京博签单量','京博库存量','京博产量','加权平均成交价']].astype('float')
# df_test = pd.read_csv('定价模型数据收集20190901-20230615.csv',encoding = 'gbk',engine = 'python')
# df_test['日期']=pd.to_datetime(df_test['日期'], format='%m/%d/%Y',infer_datetime_format=True)
df_test['日期']=pd.to_datetime(df_test['日期'], format='%Y-%m-%d',infer_datetime_format=True)
# df_test.tail(3)
MisVal_Check=df_test.isnull().sum().sort_values(ascending=False)
#去掉缺失值百分比>0.4的特征去掉这些特征后的新表格命名为df_test_1
df_MisVal_Check = pd.DataFrame(MisVal_Check,)#
df_MisVal_Check_1=df_MisVal_Check.reset_index()
df_MisVal_Check_1.columns=['Variable_Name','Missing_Number']
df_MisVal_Check_1['Missing_Number']=df_MisVal_Check_1['Missing_Number']/len(df_test)
df_test_1=df_test.drop(df_MisVal_Check_1[df_MisVal_Check_1['Missing_Number']>0.4].Variable_Name,axis = 1)
#将缺失值补为前一个或者后一个数值
df_test_1=df_test_1.fillna(df_test.ffill())
df_test_1=df_test_1.fillna(df_test_1.bfill())
df_test_1["日期"] = pd.to_datetime(df_test_1["日期"])
df_test_1.index = df_test_1["日期"]
df_test_1 = df_test_1.drop(["日期"], axis= 1)
dataset1=df_test_1.drop('京博指导价',axis=1)#.astype(float)
y=df_test_1['京博指导价']
x=dataset1
train = x
target = y
#切割数据样本集合测试集
X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0)
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor
import statsmodels.api as sm
# from keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import plotly.express as px
import plotly.graph_objects as go
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_absolute_error
from statsmodels.tools.eval_measures import mse,rmse
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
import warnings
import pickle
from sklearn.metrics import mean_squared_error
#切割训练数据和样本数据
from sklearn.model_selection import train_test_split
#用于模型评分
from sklearn.metrics import r2_score
#模型缩写
Lasso = Lasso(random_state=0)
XGBR = XGBRegressor(random_state=0)
Lasso.fit(X_train,y_train)
XGBR.fit(X_train,y_train)
y_pre_Lasso = Lasso.predict(x_test)
y_pre_XGBR = XGBR.predict(x_test)
#计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R²
Lasso_score = r2_score(y_true,y_pre_Lasso)
XGBR_score=r2_score(y_true,y_pre_XGBR)
#计算Lasso、XGBR的MSE和RMSE
Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso)
XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR)
Lasso_RMSE=np.sqrt(Lasso_MSE)
XGBR_RMSE=np.sqrt(XGBR_MSE)
model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score],
['XgBoost', XGBR_RMSE, XGBR_score]],
columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score'])
model_results1=model_results.set_index('模型(Model)')
def plot_feature_importance(importance,names,model_type):
feature_importance = np.array(importance)
feature_names = np.array(names)
data={'feature_names':feature_names,'feature_importance':feature_importance}
fi_df = pd.DataFrame(data)
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
plt.figure(figsize=(10,8))
sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
plt.title(model_type + " "+'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
estimator = XGBRegressor(random_state=0,
nthread=4,
seed=0
)
parameters = {
'max_depth': range (2, 11, 2), # 树的最大深度
'n_estimators': range (50, 101, 10), # 迭代次数
'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1]
}
grid_search_XGB = GridSearchCV(
estimator=estimator,
param_grid=parameters,
# n_jobs = 10,
cv = 3,
verbose=True
)
grid_search_XGB.fit(X_train, y_train)
print("Best score: %0.3f" % grid_search_XGB.best_score_)
print("Best parameters set:")
best_parameters = grid_search_XGB.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
y_pred = grid_search_XGB.predict(x_test)
op_XGBR_score = r2_score(y_true,y_pred)
op_XGBR_MSE= mean_squared_error(y_true, y_pred)
op_XGBR_RMSE= np.sqrt(op_XGBR_MSE)
model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]],
columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score'])
model_results2=model_results2.set_index('模型(Model)')
# results = model_results1.append(model_results2, ignore_index = False)
results = pd.concat([model_results1,model_results2],ignore_index=True)
results
import pickle
Pkl_Filename = "日度价格预测_最佳模型.pkl"
with open(Pkl_Filename, 'wb') as file:
pickle.dump(grid_search_XGB, file)
def read_xls_data():
"""获取特征项ID"""
global one_cols, two_cols
# 使用pandas读取Excel文件
df = pd.read_excel(read_file_path_name, header=None) # 不自动识别列名
# 获取第二行数据索引为1
one_cols = df.iloc[1].tolist()[1:]
print(f'获取到的数据项ID{one_cols}')
def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd):
search_data = {
"funcModule": "数据项",
"funcOperation": "查询",
"data": {
"dateStart": dateStart,
"dateEnd": dateEnd,
"dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价
}
}
headers = {"Authorization": token}
search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5))
search_value = json.loads(search_res.text)["data"]
if search_value:
return search_value
else:
return None
def save_queryDataListItemNos_xls(data_df,dataItemNoList):
from datetime import datetime,timedelta
current_year_month = datetime.now().strftime('%Y-%m')
grouped = data_df.groupby("dataDate")
# 使用openpyxl打开xlsx文件
from openpyxl import load_workbook
workbook = load_workbook('沥青数据项.xlsx')
# 创建新工作簿
new_workbook = load_workbook('沥青数据项.xlsx')
for sheetname in workbook.sheetnames:
sheet = workbook[sheetname]
new_sheet = new_workbook[sheetname]
current_year_month_row = 0
# 查找当前月份数据起始行
for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):
if str(row[0]).startswith(current_year_month):
current_year_month_row += 1
# 追加新数据
if sheetname == workbook.sheetnames[0]:
start_row = sheet.max_row - current_year_month_row + 1
for row_idx, (date, group) in enumerate(grouped, start=start_row):
new_sheet.cell(row=row_idx, column=1, value=date)
for j, dataItemNo in enumerate(dataItemNoList, start=2):
if group[group["dataItemNo"] == dataItemNo]["dataValue"].values:
new_sheet.cell(row=row_idx, column=j,
value=group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0])
# 保存修改后的xlsx文件
new_workbook.save("沥青数据项.xlsx")
# def save_queryDataListItemNos_xls(data_df,dataItemNoList):
# from datetime import datetime, timedelta
# current_year_month = datetime.now().strftime('%Y-%m')
# grouped = data_df.groupby("dataDate")
# # 打开xls文件
# workbook = xlrd.open_workbook('沥青数据项.xlsx')
# # 获取所有sheet的个数
# sheet_count = len(workbook.sheet_names())
# # 获取所有sheet的名称
# sheet_names = workbook.sheet_names()
# new_workbook = xlwt.Workbook()
# for i in range(sheet_count):
# # 获取当前sheet
# sheet = workbook.sheet_by_index(i)
# # 获取sheet的行数和列数
# row_count = sheet.nrows
# col_count = sheet.ncols
# # 获取原有数据
# data = []
# for row in range(row_count):
# row_data = []
# for col in range(col_count):
# row_data.append(sheet.cell_value(row, col))
# data.append(row_data)
# # 创建xlwt的Workbook对象
# # 创建sheet
# new_sheet = new_workbook.add_sheet(sheet_names[i])
# current_year_month_row = 0
# # 将原有的数据写入新的sheet
# for row in range(row_count):
# for col in range(col_count):
# col0 = data[row][0]
# # print("col0",col0[:7])
# if col0[:7] == current_year_month:
# current_year_month_row += 1
# break
# new_sheet.write(row, col, data[row][col])
# # print("current_year_month_row",current_year_month_row)
# if i == 0:
# rowFlag = 0
# # 查看每组数据
# for date, group in grouped:
# new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date)
# for j in range(len(dataItemNoList)):
# dataItemNo = dataItemNoList[j]
# # for dataItemNo in dataItemNoList:
# if group[group["dataItemNo"] == dataItemNo]["dataValue"].values:
# new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0])
# rowFlag += 1
# # 保存新的xls文件
# new_workbook.save("沥青数据项.xlsx")
def queryDataListItemNos(token=None):
df = pd.read_excel('沥青数据项.xlsx')
dataItemNoList = df.iloc[0].tolist()[1:]
if token is None:
token = get_head_auth()
if not token:
print('token获取失败')
return
# 获取当前日期
from datetime import datetime, timedelta
current_date = datetime.now()
# 获取当月1日
first_day_of_month = current_date.replace(day=1)
# 格式化为 YYYYMMDD 格式
dateEnd = current_date.strftime('%Y%m%d')
dateStart = first_day_of_month.strftime('%Y%m%d')
search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd)
data_df = pd.DataFrame(search_value)
data_df["dataDate"] = pd.to_datetime(data_df["dataDate"])
data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d')
save_queryDataListItemNos_xls(data_df,dataItemNoList)
print('当月数据更新完成')
def save_xls_1(append_rows):
# 打开xls文件
workbook = xlrd.open_workbook('沥青数据项.xlsx')
# 获取所有sheet的个数
sheet_count = len(workbook.sheet_names())
# 获取所有sheet的名称
sheet_names = workbook.sheet_names()
new_workbook = xlwt.Workbook()
for i in range(sheet_count):
# 获取当前sheet
sheet = workbook.sheet_by_index(i)
# 获取sheet的行数和列数
row_count = sheet.nrows - 1
col_count = sheet.ncols
# 获取原有数据
data = []
for row in range(row_count):
row_data = []
for col in range(col_count):
row_data.append(sheet.cell_value(row, col))
data.append(row_data)
# 创建xlwt的Workbook对象
# 创建sheet
new_sheet = new_workbook.add_sheet(sheet_names[i])
# 将原有的数据写入新的sheet
for row in range(row_count):
for col in range(col_count):
new_sheet.write(row, col, data[row][col])
if i == 0:
# 在新的sheet中添加数据
for col in range(col_count):
new_sheet.write(row_count, col, append_rows[col])
# 保存新的xls文件
new_workbook.save("沥青数据项.xlsx")
def start(date=''):
"""获取当日数据"""
read_xls_data()
token = get_head_auth()
if not token:
return
cur_time,cur_time2 = getNow(date)
print(f"获取{cur_time}数据")
datas = get_data_value(token, one_cols,date=cur_time)
print(len(datas))
print(datas)
if not datas:
return
append_rows = [cur_time2]
dataItemNo_dataValue = {}
for data_value in datas:
if "dataValue" not in data_value:
print(data_value)
dataItemNo_dataValue[data_value["dataItemNo"]] = ""
else:
dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"]
for value in one_cols:
if value in dataItemNo_dataValue:
append_rows.append(dataItemNo_dataValue[value])
else:
append_rows.append("")
print('添加的行:',len(append_rows),append_rows)
save_xls_2(append_rows)
def save_xls_2(append_rows):
"""保存或更新数据到Excel文件
参数:
append_rows (list): 需要追加/更新的数据行,格式为[日期, 数据项1, 数据项2,...]
"""
# try:
# 读取现有数据(假设第一行为列名)
df = pd.read_excel('沥青数据项.xlsx', sheet_name=0)
print('文件中的数据列数:',len(df.columns),df.columns)
# 转换append_rows为DataFrame
if len(append_rows) != len(df.columns):
# 去除第二个元素 ,不知道什么原因多一个空数据
append_rows.pop(1)
append_rows = pd.DataFrame([append_rows],columns=df.columns)
# 创建新数据行
new_date = append_rows['日期'].values[0]
dates = df['日期'].to_list()
# 判断日期是否存在
if new_date in dates:
# 找到日期所在行的索引
date_mask = df['日期'] == new_date
# 存在则更新数据
df.loc[date_mask] = append_rows.values
print(f"更新 {new_date} 数据")
else:
# 不存在则追加数据
df = pd.concat([df, append_rows], ignore_index=True)
print(df.head())
print(df.tail())
print(f"插入 {new_date} 新数据")
# 保存更新后的数据
df.to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')
# except FileNotFoundError:
# # 如果文件不存在则创建新文件
# pd.DataFrame([append_rows]).to_excel('沥青数据项.xlsx', index=False, engine='openpyxl')
# except Exception as e:
# print(f"保存数据时发生错误: {str(e)}")
def main(start_date=None,token=None,token_push=None):
from datetime import datetime, timedelta
if start_date is None:
start_date = datetime.now()
if token is None:
token = get_head_auth()
if token_push is None:
token_push = get_head_push_auth()
date = start_date.strftime('%Y%m%d')
print(date)
# start(date)
# 更新当月数据
queryDataListItemNos(token)
# 训练模型
optimize_Model()
# # 预测&上传预测结果
upload_data_to_system(token_push,start_date)
if __name__ == "__main__":
print("运行中ing...")
main()